├── .github
└── workflows
│ ├── release.yml
│ └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── poetry.lock
├── print-coverage.py
├── pyproject.toml
├── sigma
├── backends
│ ├── kusto
│ │ ├── __init__.py
│ │ └── kusto.py
│ └── microsoft365defender
│ │ ├── __init__.py
│ │ └── microsoft365defender.py
└── pipelines
│ ├── azuremonitor
│ ├── __init__.py
│ ├── azuremonitor.py
│ ├── mappings.py
│ ├── schema.py
│ ├── tables.py
│ └── transformations.py
│ ├── kusto_common
│ ├── __init__.py
│ ├── conditions.py
│ ├── errors.py
│ ├── finalization.py
│ ├── mappings.py
│ ├── postprocessing.py
│ ├── schema.py
│ └── transformations.py
│ ├── microsoft365defender
│ ├── __init__.py
│ └── microsoft365defender.py
│ ├── microsoftxdr
│ ├── __init__.py
│ ├── mappings.py
│ ├── microsoftxdr.py
│ ├── schema.py
│ ├── tables.py
│ └── transformations.py
│ └── sentinelasim
│ ├── __init__.py
│ ├── mappings.py
│ ├── schema.py
│ ├── sentinelasim.py
│ ├── tables.py
│ └── transformations.py
├── tests
├── test_backend_kusto.py
├── test_pipelines_azuremonitor.py
├── test_pipelines_microsoftxdr.py
└── test_pipelines_sentinelasim.py
└── utils
├── get_azure_monitor_schema_tables.py
├── get_microsoft_xdr_schema_tables.py
└── get_sentinel_asim_schema_tables.py
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release to PyPI
2 | on:
3 | release:
4 | types: [published]
5 | push:
6 | tags:
7 | - v*.*.*
8 |
9 | jobs:
10 | build-and-publish:
11 | runs-on: ubuntu-20.04
12 | steps:
13 | - uses: actions/checkout@v3
14 | - name: Install Poetry
15 | run: pipx install poetry
16 | - name: Set up Python
17 | uses: actions/setup-python@v4
18 | with:
19 | python-version: 3.8
20 | cache: poetry
21 | - name: Verify versioning
22 | run: |
23 | [ "$(poetry version -s)" == "${GITHUB_REF#refs/tags/v}" ]
24 | - name: Install dependencies
25 | run: poetry install
26 | - name: Run tests
27 | run: poetry run pytest
28 | - name: Build packages
29 | run: poetry build
30 | - name: Configure Poetry
31 | run: |
32 | poetry config repositories.testpypi https://test.pypi.org/legacy/
33 | poetry config pypi-token.testpypi ${{ secrets.TEST_PYPI_API_TOKEN }}
34 | poetry config pypi-token.pypi "${{ secrets.PYPI_API_TOKEN }}"
35 | #- name: Publish to test PyPI
36 | # if: ${{ github.event_name == 'push' }}
37 | # run: poetry publish -r testpypi
38 | - name: Publish to PyPI
39 | if: ${{ github.event_name == 'release' }}
40 | run: poetry publish
41 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 | on:
3 | push:
4 | branches: [ "*" ]
5 | pull_request:
6 | branches: [ "*" ]
7 | workflow_dispatch:
8 |
9 | jobs:
10 | test:
11 | strategy:
12 | matrix:
13 | os: [ 'ubuntu-20.04' ]
14 | python-version: [ '3.8', '3.9', '3.10', '3.11']
15 | runs-on: ${{ matrix.os }}
16 | steps:
17 | - uses: actions/checkout@v2
18 | - name: Install Poetry
19 | run: pipx install poetry
20 | - name: Set up Python
21 | uses: actions/setup-python@v4
22 | with:
23 | python-version: ${{ matrix.python-version }}
24 | cache: poetry
25 | - name: Install dependencies
26 | run: poetry install
27 | - name: Run tests
28 | run: poetry run pytest --cov=sigma --cov-report term --cov-report xml:cov.xml -vv
29 | - name: Store coverage for badge
30 | if: ${{ runner.os == 'Linux' }}
31 | run: poetry run python print-coverage.py >> $GITHUB_ENV
32 | - name: Create coverage badge
33 | if: ${{ github.repository == 'AttackIQ/pySigma-backend-microsoft365defender' && github.event_name == 'push' && runner.os == 'Linux' }}
34 | uses: schneegans/dynamic-badges-action@v1.1.0
35 | with:
36 | auth: ${{ secrets.GIST_SECRET }}
37 | gistID: 9c0879725c7f94387801390bbb0ac8d6
38 | filename: slincoln-aiq-pySigma-backend-microsoft365defender.json
39 | label: Coverage
40 | message: ${{ env.COVERAGE }}
41 | color: ${{ env.COVERAGE_COLOR }}
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .coverage*
2 | .vscode/
3 | **/__pycache__
4 | .pytest_cache/
5 | cov.xml
6 | dist/
7 | docs/_build
8 | .env
9 | .venv/
10 | .python-version
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | GNU LESSER GENERAL PUBLIC LICENSE
3 | Version 3, 29 June 2007
4 |
5 | Copyright (C) 2007 Free Software Foundation, Inc.
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 |
10 | This version of the GNU Lesser General Public License incorporates
11 | the terms and conditions of version 3 of the GNU General Public
12 | License, supplemented by the additional permissions listed below.
13 |
14 | 0. Additional Definitions.
15 |
16 | As used herein, "this License" refers to version 3 of the GNU Lesser
17 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
18 | General Public License.
19 |
20 | "The Library" refers to a covered work governed by this License,
21 | other than an Application or a Combined Work as defined below.
22 |
23 | An "Application" is any work that makes use of an interface provided
24 | by the Library, but which is not otherwise based on the Library.
25 | Defining a subclass of a class defined by the Library is deemed a mode
26 | of using an interface provided by the Library.
27 |
28 | A "Combined Work" is a work produced by combining or linking an
29 | Application with the Library. The particular version of the Library
30 | with which the Combined Work was made is also called the "Linked
31 | Version".
32 |
33 | The "Minimal Corresponding Source" for a Combined Work means the
34 | Corresponding Source for the Combined Work, excluding any source code
35 | for portions of the Combined Work that, considered in isolation, are
36 | based on the Application, and not on the Linked Version.
37 |
38 | The "Corresponding Application Code" for a Combined Work means the
39 | object code and/or source code for the Application, including any data
40 | and utility programs needed for reproducing the Combined Work from the
41 | Application, but excluding the System Libraries of the Combined Work.
42 |
43 | 1. Exception to Section 3 of the GNU GPL.
44 |
45 | You may convey a covered work under sections 3 and 4 of this License
46 | without being bound by section 3 of the GNU GPL.
47 |
48 | 2. Conveying Modified Versions.
49 |
50 | If you modify a copy of the Library, and, in your modifications, a
51 | facility refers to a function or data to be supplied by an Application
52 | that uses the facility (other than as an argument passed when the
53 | facility is invoked), then you may convey a copy of the modified
54 | version:
55 |
56 | a) under this License, provided that you make a good faith effort to
57 | ensure that, in the event an Application does not supply the
58 | function or data, the facility still operates, and performs
59 | whatever part of its purpose remains meaningful, or
60 |
61 | b) under the GNU GPL, with none of the additional permissions of
62 | this License applicable to that copy.
63 |
64 | 3. Object Code Incorporating Material from Library Header Files.
65 |
66 | The object code form of an Application may incorporate material from
67 | a header file that is part of the Library. You may convey such object
68 | code under terms of your choice, provided that, if the incorporated
69 | material is not limited to numerical parameters, data structure
70 | layouts and accessors, or small macros, inline functions and templates
71 | (ten or fewer lines in length), you do both of the following:
72 |
73 | a) Give prominent notice with each copy of the object code that the
74 | Library is used in it and that the Library and its use are
75 | covered by this License.
76 |
77 | b) Accompany the object code with a copy of the GNU GPL and this license
78 | document.
79 |
80 | 4. Combined Works.
81 |
82 | You may convey a Combined Work under terms of your choice that,
83 | taken together, effectively do not restrict modification of the
84 | portions of the Library contained in the Combined Work and reverse
85 | engineering for debugging such modifications, if you also do each of
86 | the following:
87 |
88 | a) Give prominent notice with each copy of the Combined Work that
89 | the Library is used in it and that the Library and its use are
90 | covered by this License.
91 |
92 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
93 | document.
94 |
95 | c) For a Combined Work that displays copyright notices during
96 | execution, include the copyright notice for the Library among
97 | these notices, as well as a reference directing the user to the
98 | copies of the GNU GPL and this license document.
99 |
100 | d) Do one of the following:
101 |
102 | 0) Convey the Minimal Corresponding Source under the terms of this
103 | License, and the Corresponding Application Code in a form
104 | suitable for, and under terms that permit, the user to
105 | recombine or relink the Application with a modified version of
106 | the Linked Version to produce a modified Combined Work, in the
107 | manner specified by section 6 of the GNU GPL for conveying
108 | Corresponding Source.
109 |
110 | 1) Use a suitable shared library mechanism for linking with the
111 | Library. A suitable mechanism is one that (a) uses at run time
112 | a copy of the Library already present on the user's computer
113 | system, and (b) will operate properly with a modified version
114 | of the Library that is interface-compatible with the Linked
115 | Version.
116 |
117 | e) Provide Installation Information, but only if you would otherwise
118 | be required to provide such information under section 6 of the
119 | GNU GPL, and only to the extent that such information is
120 | necessary to install and execute a modified version of the
121 | Combined Work produced by recombining or relinking the
122 | Application with a modified version of the Linked Version. (If
123 | you use option 4d0, the Installation Information must accompany
124 | the Minimal Corresponding Source and Corresponding Application
125 | Code. If you use option 4d1, you must provide the Installation
126 | Information in the manner specified by section 6 of the GNU GPL
127 | for conveying Corresponding Source.)
128 |
129 | 5. Combined Libraries.
130 |
131 | You may place library facilities that are a work based on the
132 | Library side by side in a single library together with other library
133 | facilities that are not Applications and are not covered by this
134 | License, and convey such a combined library under terms of your
135 | choice, if you do both of the following:
136 |
137 | a) Accompany the combined library with a copy of the same work based
138 | on the Library, uncombined with any other library facilities,
139 | conveyed under the terms of this License.
140 |
141 | b) Give prominent notice with the combined library that part of it
142 | is a work based on the Library, and explaining where to find the
143 | accompanying uncombined form of the same work.
144 |
145 | 6. Revised Versions of the GNU Lesser General Public License.
146 |
147 | The Free Software Foundation may publish revised and/or new versions
148 | of the GNU Lesser General Public License from time to time. Such new
149 | versions will be similar in spirit to the present version, but may
150 | differ in detail to address new problems or concerns.
151 |
152 | Each version is given a distinguishing version number. If the
153 | Library as you received it specifies that a certain numbered version
154 | of the GNU Lesser General Public License "or any later version"
155 | applies to it, you have the option of following the terms and
156 | conditions either of that published version or of any later version
157 | published by the Free Software Foundation. If the Library as you
158 | received it does not specify a version number of the GNU Lesser
159 | General Public License, you may choose any version of the GNU Lesser
160 | General Public License ever published by the Free Software Foundation.
161 |
162 | If the Library as you received it specifies that a proxy can decide
163 | whether future versions of the GNU Lesser General Public License shall
164 | apply, that proxy's public statement of acceptance of any version is
165 | permanent authorization for you to choose that version for the
166 | Library.
167 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pySigma Kusto Query Language (KQL) Backend
2 |
3 | 
4 | 
5 | 
6 | 
7 | 
8 | 
9 | 
10 |
11 | ## Contents
12 |
13 | - [pySigma Kusto Query Language (KQL) Backend](#pysigma-kusto-query-language-kql-backend)
14 | - [📖 Overview](#-overview)
15 | - [🔑 Key Features](#-key-features)
16 | - [🧑💻 Maintainer](#-maintainer)
17 | - [🚀 Quick Start](#-quick-start)
18 | - [📘 Usage](#-usage)
19 | - [🖥️ sigma-cli](#️-sigma-cli)
20 | - [🐍 Python Script](#-python-script)
21 | - [🛠️ Advanced Features](#️-advanced-features)
22 | - [🔄 Pipeline \& Backend Args (New in 0.2.0)](#-pipeline--backend-args-new-in-020)
23 | - [🗃️ Custom Table Names (New in 0.3.0) (Beta)](#️-custom-table-names-new-in-030-beta)
24 | - [🔄 Processing Pipelines](#-processing-pipelines)
25 | - [📊 Rule Support](#-rule-support)
26 | - [🖥️ Commonly Supported Categories](#️-commonly-supported-categories)
27 | - [🧪 Custom Transformations](#-custom-transformations)
28 | - [📊 Custom Postprocessing Item](#-custom-postprocessing-item)
29 | - [❓Frequently Asked Questions](#frequently-asked-questions)
30 | - [How do I set the table name for a rule?](#how-do-i-set-the-table-name-for-a-rule)
31 | - [How do I set the table name for a rule in YAML?](#how-do-i-set-the-table-name-for-a-rule-in-yaml)
32 | - [How is the table name determined for a rule?](#how-is-the-table-name-determined-for-a-rule)
33 | - [How are field mappings determined for a rule?](#how-are-field-mappings-determined-for-a-rule)
34 | - [What tables are supported for each pipeline?](#what-tables-are-supported-for-each-pipeline)
35 | - [I am receiving an `Invalid SigmaDetectionItem field name encountered` error. What does this mean?](#i-am-receiving-an-invalid-sigmadetectionitem-field-name-encountered-error-what-does-this-mean)
36 | - [My query\_table or custom field mapping isn't working](#my-query_table-or-custom-field-mapping-isnt-working)
37 | - [🤝 Contributing](#-contributing)
38 | - [📄 License](#-license)
39 |
40 | ## 📖 Overview
41 |
42 | The **pySigma Kusto Backend** transforms Sigma Rules into queries using [Kusto Query Language (KQL)](https://learn.microsoft.com/en-us/kusto/query/?view=microsoft-fabric). This backend supports multiple Microsoft products, including:
43 |
44 | - [Microsoft XDR Advanced Hunting Queries](https://learn.microsoft.com/en-us/defender-xdr/advanced-hunting-overview) (Formally Microsoft 365 Defender Advanced Hunting Queries)
45 | - [Azure Sentinel Advanced Security Information Model (ASIM) Queries](https://learn.microsoft.com/en-us/azure/sentinel/normalization)
46 | - [Azure Monitor Queries](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/get-started-queries)
47 |
48 | > **Note:** This backend was previously named **pySigma Microsoft 365 Defender Backend**.
49 |
50 | ### 🔑 Key Features
51 |
52 | - **Backend**: `sigma.backends.kusto` with `KustoBackend` class
53 | - **Pipelines**: Provides `microsoft_xdr_pipeline`, `sentinelasim_pipeline`, and `azure_monitor_pipeline` for query tables and field renames
54 | - **Output**: Query strings in Kusto Query Language (KQL)
55 |
56 | ### 🧑💻 Maintainer
57 |
58 | - [Stephen Lincoln](https://github.com/slincoln-aiq) via [AttackIQ](https://github.com/AttackIQ)
59 |
60 | ## 🚀 Quick Start
61 |
62 | 1. Install the package:
63 |
64 | ```bash
65 | pip install pysigma-backend-kusto
66 | ```
67 |
68 | > **Note:** This package requires `pySigma` version 0.10.0 or higher.
69 |
70 | 2. Convert a Sigma rule to MIcrosoft XDR KQL query using sigma-cli:
71 |
72 | ```bash
73 | sigma convert -t kusto -p microsoft_xdr path/to/your/rule.yml
74 | ```
75 |
76 | 3. Or use in a Python script:
77 |
78 | ```python
79 | from sigma.rule import SigmaRule
80 |
81 | from sigma.backends.kusto import KustoBackend
82 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline
83 |
84 | # Load your Sigma rule
85 | rule = SigmaRule.from_yaml(
86 | """
87 | title: Mimikatz CommandLine
88 | status: test
89 | logsource:
90 | category: process_creation
91 | product: windows
92 | detection:
93 | sel:
94 | CommandLine|contains: mimikatz.exe
95 | condition: sel
96 | """
97 | )
98 |
99 | # Convert the rule
100 | xdr_pipeline = microsoft_xdr_pipeline()
101 | backend = KustoBackend(processing_pipeline=xdr_pipeline)
102 | print(backend.convert_rule(rule)[0])
103 |
104 | ```
105 |
106 | ## 📘 Usage
107 |
108 | ### 🖥️ sigma-cli
109 |
110 | Use with `sigma-cli` per [typical sigma-cli usage](https://github.com/SigmaHQ/sigma-cli#usage):
111 |
112 | ```bash
113 | sigma convert -t kusto -p microsoft_xdr -f default -s ~/sigma/rules
114 | ```
115 |
116 | ### 🐍 Python Script
117 |
118 | Use the backend and pipeline in a standalone Python script. Note, the backend automatically applies the pipeline, but
119 | you can manually add it if you would like.
120 |
121 | ```python
122 | from sigma.rule import SigmaRule
123 | from sigma.backends.kusto import KustoBackend
124 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline
125 |
126 | # Define an example rule as a YAML str
127 | sigma_rule = SigmaRule.from_yaml("""
128 | title: Mimikatz CommandLine
129 | status: test
130 | logsource:
131 | category: process_creation
132 | product: windows
133 | detection:
134 | sel:
135 | CommandLine|contains: mimikatz.exe
136 | condition: sel
137 | """)
138 | # Create backend, which automatically adds the pipeline
139 | kusto_backend = KustoBackend()
140 |
141 | # Or apply the pipeline manually
142 | pipeline = microsoft_xdr_pipeline()
143 | pipeline.apply(sigma_rule)
144 |
145 | # Convert the rule
146 | print(sigma_rule.title + " KQL Query: \n")
147 | print(kusto_backend.convert_rule(sigma_rule)[0])
148 | ```
149 |
150 | Output:
151 |
152 | ```text
153 | Mimikatz CommandLine KQL Query:
154 |
155 | DeviceProcessEvents
156 | | where ProcessCommandLine contains "mimikatz.exe"
157 | ```
158 |
159 | ## 🛠️ Advanced Features
160 |
161 | ### 🔄 Pipeline & Backend Args (New in 0.2.0)
162 |
163 | For the `microsoft_xdr_pipeline`:
164 |
165 | - `transform_parent_image`: Controls ParentImage field mapping behavior
166 | - When set to `True` (default), maps ParentImage to InitiatingProcessParentFileName
167 | - When set to `False`, maps ParentImage to InitiatingProcessFileName
168 | - Useful for adjusting field mappings based on specific rule requirements
169 | - Example usage:
170 |
171 | ```python
172 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline
173 | pipeline = microsoft_xdr_pipeline(transform_parent_image=False)
174 | ```
175 |
176 | This argument allows fine-tuning of the ParentImage field mapping, which can be crucial for accurate rule conversion in certain scenarios. By default, it follows the behavior of mapping ParentImage to the parent process name, but setting it to `False` allows for mapping to the initiating process name instead.
177 |
178 | ### 🗃️ Custom Table Names (New in 0.3.0) (Beta)
179 |
180 | The `query_table` argument allows users to override table mappings and set custom table names. This is useful for converting Sigma rules where the rule category does not easily map to the default table names.
181 |
182 | #### YAML Pipelines
183 |
184 | To set a custom table name, ensure your pipeline has a priority of 9 or lower, as sigma-cli merges pipelines based on priority (default is 10). Field mappings in `mappings.py` will apply according to your specified table name, along with any custom field mapping transformations.
185 |
186 | ```YAML
187 | # test_table_name_pipeline.yml
188 | name: Custom Query Table Pipeline
189 | priority: 1
190 | transformations:
191 | - id: test_name_name
192 | type: set_state
193 | key: "query_table"
194 | val: ["DeviceProcessEvents"]
195 | ```
196 |
197 | ```bash
198 | sigma convert -t kusto -p microsoft_xdr -p test_table_name_pipeline.yml test_rule.yml
199 | ```
200 |
201 | #### Python Pipelines
202 |
203 | You can also set the table name in the pipeline via Python by passing the `query_table` parameter to the pipeline.
204 |
205 | ```python
206 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline
207 | my_pipeline = microsoft_xdr_pipeline(query_table="DeviceProcessEvents")
208 | ```
209 |
210 | ## 🔄 Processing Pipelines
211 |
212 | This project includes three main processing pipelines, each designed for a specific Microsoft product:
213 |
214 | 1. **Microsoft XDR Pipeline** (formerly Microsoft 365 Defender)
215 | - Status: Production-ready
216 | - Supports a wide range of Sigma rule categories
217 | - All tables supported, but additional field mapping contributions welcome
218 |
219 | 2. **Sentinel ASIM Pipeline**
220 | - Status: Beta
221 | - Transforms rules for Microsoft Sentinel Advanced Security Information Model (ASIM)
222 | - All tables supported, but field mappings are limited
223 |
224 | 3. **Azure Monitor Pipeline**
225 | - Status: Alpha
226 | - Currently supports field mappings for `SecurityEvent` and `SigninLogs` tables only
227 | - All tables supported, but requires custom field mappings for other tables
228 |
229 | Each pipeline includes a `query_table` parameter for setting custom table names.
230 |
231 | ### 📊 Rule Support
232 |
233 | Rules are supported if either:
234 |
235 | - A valid table name is supplied via the `query_table` parameter or YAML pipeline
236 | - The rule's logsource category is supported and mapped in the pipeline's `mappings.py` file
237 | - The rule has an `EventID` or `EventCode` field in the `detection` section, and the eventid is present in the pipeline's `eventid_to_table_mappings` dictionary
238 |
239 | ### 🖥️ Commonly Supported Categories
240 |
241 | - process_creation
242 | - image_load
243 | - network_connection
244 | - file_access, file_change, file_delete, file_event, file_rename
245 | - registry_add, registry_delete, registry_event, registry_set
246 |
247 | Specific pipelines may support additional categories. Check each pipeline's `mappings.py` file for details.
248 |
249 | ## 🧪 Custom Transformations
250 |
251 | This package includes several custom `ProcessingPipeline` `Transformation` classes:
252 |
253 | 1. **DynamicFieldMappingTransformation**
254 | - Determines field mappings based on the `query_table` state parameter
255 |
256 | 2. **GenericFieldMappingTransformation**
257 | - Applies common field mappings across all tables in a pipeline
258 |
259 | 3. **BaseHashesValuesTransformation**
260 | - Transforms the Hashes field, removing hash algorithm prefixes
261 |
262 | 4. **ParentImageValueTransformation**
263 | - Extracts parent process name from Sysmon ParentImage field
264 |
265 | 5. **SplitDomainUserTransformation**
266 | - Splits User field into separate domain and username fields
267 |
268 | 6. **RegistryActionTypeValueTransformation**
269 | - Adjusts registry ActionType values for compatibility
270 |
271 | 7. **InvalidFieldTransformation**
272 | - Identifies unsupported or invalid fields in rules
273 |
274 | 8. **SetQueryTableStateTransformation**
275 | - Manages the `query_table` state based on rule category or custom settings
276 |
277 | ### 📊 Custom Postprocessing Item
278 |
279 | 1. **PrependQueryTablePostprocessingItem**
280 |
281 | - Adds table name as prefix to each query in a SigmaCollection, or single query in a SigmaRule
282 |
283 | ## ❓Frequently Asked Questions
284 |
285 | ### How do I set the table name for a rule?
286 |
287 | You can set the table name for a rule by adding the `query_table` parameter to the pipeline and setting it to the table name you want to use.
288 |
289 | ```python
290 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline
291 | pipeline = microsoft_xdr_pipeline(query_table="DeviceProcessEvents")
292 | ```
293 |
294 | ### How do I set the table name for a rule in YAML?
295 |
296 | You can set the table name for a rule in YAML by adding the `query_table` parameter to the pipeline and setting it to the table name you want to use.
297 |
298 | ```YAML
299 | # test_table_name_pipeline.yml
300 | name:
301 | priority: 1
302 | transformations:
303 | - id: test_name_name
304 | type: set_state
305 | key: "query_table"
306 | val: ["DeviceProcessEvents"]
307 | ```
308 |
309 | ```bash
310 | sigma convert -t kusto -p microsoft_xdr -p test_table_name_pipeline.yml test_rule.yml
311 | ```
312 |
313 | ### How is the table name determined for a rule?
314 |
315 | The table name is set by the `SetQueryTableStateTransformation` transformation, which is the first transformation in each pipeline. The `query_table` is set to the pipeline's `state` parameter with the following priority:
316 | 1. The `query_table` parameter passed to the pipeline, if using a Python script/code.
317 | 2. The `query_table` parameter passed to the pipeline in a custom YAML pipeline, if using sigma-cli.
318 | 3. The `logsource.category` field in the rule, if the category is present in the pipeline's `category_to_table_mappings` dictionary.
319 | 4. The `EventID` or `EventCode` field, if present in the rule's `detection` section, and if the eventid is present in the pipeline's `eventid_to_table_mappings` dictionary.
320 | 5. If none of the above are present, an error is raised.
321 |
322 | ### How are field mappings determined for a rule?
323 |
324 | The field mappings are determined by the `DynamicFieldMappingTransformation` transformation. It will use the table name from the pipeline state's `query_table` key. The field mapping logic is defined in each pipeline's `mappings.py` file for each table. If a field is not found in the table, the `GenericFieldMappingTransformation` will apply generic field mappings. If a field is not found in the generic field mappings, the field will be kept the same.
325 |
326 | ### What tables are supported for each pipeline?
327 |
328 | The tables that are supported for each pipeline are defined in each pipeline's `tables.py` file. This file is automatically generated by the scripts in the `utils` folder. These scripts pull documentation from Microsoft to get all documented tables and their fields and schema.
329 |
330 | ### I am receiving an `Invalid SigmaDetectionItem field name encountered` error. What does this mean?
331 |
332 | This error means that the field name(s) provided in the error are not found in the tables fields defined in `tables.py` for the pipeline you are using. This probably means that a Sigma rule's field was not found in the field mappings for the table. To fix this error, you can supply your own custom field mappings to convert the unsupported field into a supported one. For example, in using YAML:
333 |
334 | ```YAML
335 | # custom_field_mapping_pipeline.yml
336 | name: Custom Field Mapping
337 | priority: 1
338 | transformations:
339 | - id: field_mapping
340 | type: field_name_mapping
341 | mapping:
342 | MyNotSupportedField: a_supported_field
343 | rule_conditions:
344 | - type: logsource
345 | service: sysmon
346 | ```
347 |
348 | ```bash
349 | sigma convert -t kusto -p custom_field_mapping_pipeline.yml -p microsoft_xdr test_rule.yml
350 | ```
351 |
352 | If you find the field mapping useful, please consider submitting a PR to add it to the pipeline's field mappings :)
353 |
354 | ### My query_table or custom field mapping isn't working
355 |
356 | Each pipeline in the project has a priority of 10. If you are trying to set the table name or custom field mappings, your pipeline needs to have a priority of 9 or less. You can set the priority in the YAML pipeline like so:
357 |
358 | ```YAML
359 | # test_table_name_pipeline.yml
360 | name:
361 | priority: 9
362 | transformations:
363 | - id: test_name_name
364 | type: set_state
365 | key: "query_table"
366 | val: ["DeviceProcessEvents"]
367 | ```
368 |
369 | ## 🤝 Contributing
370 |
371 | Contributions are welcome, especially for table and field mappings! Please feel free to submit a Pull Request.
372 |
373 | 1. Fork the repository
374 | 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
375 | 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
376 | 4. Push to the branch (`git push origin feature/AmazingFeature`)
377 | 5. Open a Pull Request
378 |
379 | Please make sure to update tests as appropriate.
380 |
381 | ## 📄 License
382 |
383 | This project is licensed under the GNU Lesser General Public License v3.0 - see the [LICENSE](LICENSE) file for details.
384 |
--------------------------------------------------------------------------------
/print-coverage.py:
--------------------------------------------------------------------------------
1 | # Prints code testing coverage as percentage for badge generation.
2 | import xml.etree.ElementTree as et
3 |
4 | tree = et.parse("cov.xml")
5 | root = tree.getroot()
6 | coverage = float(root.attrib["line-rate"]) * 100
7 | print(f"COVERAGE={coverage:3.4}%")
8 | if coverage >= 95.0:
9 | print("COVERAGE_COLOR=green")
10 | elif coverage >= 90.0:
11 | print("COVERAGE_COLOR=yellow")
12 | elif coverage >= 85.0:
13 | print("COVERAGE_COLOR=orange")
14 | else:
15 | print("COVERAGE_COLOR=red")
16 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "pySigma-backend-kusto"
3 | version = "0.4.3"
4 | description = "pySigma Kusto backend"
5 | authors = ["Stephen Lincoln "]
6 | license = "LGPL-3.0-only"
7 | repository = "https://github.com/AttackIQ/pySigma-backend-kusto"
8 | packages = [
9 | { include = "sigma" }
10 | ]
11 |
12 | [tool.poetry.dependencies]
13 | python = "^3.8"
14 | pysigma = ">= 0.10.0"
15 | certifi = ">=2023.07.22"
16 |
17 | [tool.poetry.group.dev.dependencies]
18 | pytest = "^7.2.1"
19 | pytest-cov = "^4.0.0"
20 | coverage = "^7.2.1"
21 | requests = "^2.32.3"
22 | python-dotenv = "^1.0.1"
23 | beautifulsoup4 = "^4.12.3"
24 | ruff = "^0.6.9"
25 | black = "^24.8.0"
26 |
27 | [build-system]
28 | requires = ["poetry-core>=1.0.0"]
29 | build-backend = "poetry.core.masonry.api"
30 |
31 | [tool.pytest.ini_options]
32 | addopts = "-v --cov=sigma --cov-report=term-missing"
33 | testpaths = ["tests"]
34 |
35 | [tool.coverage.run]
36 | branch = true
37 | source = ["sigma"]
38 | omit = [
39 | # omit anything in a tests directory
40 | "tests/*",
41 | ]
42 |
43 | [tool.coverage.report]
44 | exclude_lines = [
45 | "pragma: no cover",
46 | "def __repr__",
47 | "raise NotImplementedError",
48 | "if __name__ == .__main__.:",
49 | "pass",
50 | "raise ImportError",
51 | ]
52 | show_missing = true
53 | fail_under = 80
54 |
55 | [tool.black]
56 | line-length = 120
57 | target-version = ['py311']
58 |
59 | [tool.ruff]
60 | line-length = 120 # Same as Black
61 | # Assume Python 3.11
62 | target-version = "py311"
63 | # Exclude a variety of commonly ignored directories.
64 | exclude = [
65 | ".bzr",
66 | ".direnv",
67 | ".eggs",
68 | ".git",
69 | ".git-rewrite",
70 | ".hg",
71 | ".ipynb_checkpoints",
72 | ".mypy_cache",
73 | ".nox",
74 | ".pants.d",
75 | ".pyenv",
76 | ".pytest_cache",
77 | ".pytype",
78 | ".ruff_cache",
79 | ".svn",
80 | ".tox",
81 | ".venv",
82 | ".vscode",
83 | "__pypackages__",
84 | "_build",
85 | "buck-out",
86 | "build",
87 | "dist",
88 | "node_modules",
89 | "site-packages",
90 | "venv",
91 | ".venv"
92 | ]
93 |
94 | [tool.ruff.lint]
95 | # Enable isort rules
96 | select = ["I"]
97 |
98 | # Sort imports
99 | [tool.ruff.lint.isort]
100 | combine-as-imports = true
101 | case-sensitive = false
102 |
--------------------------------------------------------------------------------
/sigma/backends/kusto/__init__.py:
--------------------------------------------------------------------------------
1 | from .kusto import KustoBackend
2 |
3 | # TODO: add all backend classes that should be exposed to the user of your backend in the import statement above.
4 |
5 | backends = { # Mapping between backend identifiers and classes. This is used by the pySigma plugin system to recognize backends and expose them with the identifier.
6 | "kusto": KustoBackend,
7 | }
8 |
--------------------------------------------------------------------------------
/sigma/backends/kusto/kusto.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import ClassVar, Dict, Pattern, Tuple, Type, Union
3 |
4 | from sigma.conditions import (
5 | ConditionAND,
6 | ConditionFieldEqualsValueExpression,
7 | ConditionItem,
8 | ConditionNOT,
9 | ConditionOR,
10 | )
11 | from sigma.conversion.base import TextQueryBackend
12 | from sigma.conversion.deferred import DeferredQueryExpression
13 | from sigma.conversion.state import ConversionState
14 | from sigma.types import SigmaCompareExpression, SigmaNumber, SigmaString, SpecialChars
15 |
16 |
17 | class KustoBackend(TextQueryBackend):
18 | """Microsoft 365 Defender KQL Backend."""
19 |
20 | # The backend generates grouping if required
21 | name: ClassVar[str] = "Kusto backend"
22 | identifier: ClassVar[str] = "kusto"
23 | formats: Dict[str, str] = {
24 | "default": "Kusto Query Language search strings",
25 | }
26 |
27 | requires_pipeline: bool = False # m365 pipeline is automatically applied
28 |
29 | # Operator precedence
30 | parenthesize = True
31 | precedence: ClassVar[Tuple[Type[ConditionItem], Type[ConditionItem], Type[ConditionItem]]] = (
32 | ConditionNOT,
33 | ConditionAND,
34 | ConditionOR,
35 | )
36 | group_expression: ClassVar[str] = (
37 | "({expr})" # Expression for precedence override grouping as format string with {expr} placeholder
38 | )
39 | # Generated query tokens
40 | token_separator: str = " " # separator inserted between all boolean operators
41 | or_token: ClassVar[str] = "or"
42 | and_token: ClassVar[str] = "and"
43 | not_token: ClassVar[str] = "not"
44 | eq_token: ClassVar[str] = " =~ " # Token inserted between field and value (without separator)
45 |
46 | # String output
47 | ## Fields
48 | ### Quoting
49 | field_quote: ClassVar[str] = (
50 | "'" # Character used to quote field characters if field_quote_pattern matches (or not, depending on field_quote_pattern_negation). No field name quoting is done if not set.
51 | )
52 | field_quote_pattern: ClassVar[Pattern] = re.compile(
53 | "^\\w+$"
54 | ) # Quote field names if this pattern (doesn't) matches, depending on field_quote_pattern_negation. Field name is always quoted if pattern is not set.
55 | field_quote_pattern_negation: ClassVar[bool] = (
56 | True # Negate field_quote_pattern result. Field name is quoted if pattern doesn't matches if set to True (default).
57 | )
58 |
59 | ### Escaping
60 | field_escape: ClassVar[str] = "" # Character to escape particular parts defined in field_escape_pattern.
61 | field_escape_quote: ClassVar[bool] = True # Escape quote string defined in field_quote
62 | field_escape_pattern: ClassVar[Pattern] = re.compile(
63 | "\\s"
64 | ) # All matches of this pattern are prepended with the string contained in field_escape.
65 |
66 | ## Values
67 | str_quote: ClassVar[str] = '"' # string quoting character (added as escaping character)
68 | escape_char: ClassVar[str] = "\\" # Escaping character for special characters inside string
69 | wildcard_multi: ClassVar[str] = "*" # Character used as multi-character wildcard
70 | wildcard_single: ClassVar[str] = "*" # Character used as single-character wildcard
71 | add_escaped: ClassVar[str] = "\\" # Characters quoted in addition to wildcards and string quote
72 | filter_chars: ClassVar[str] = "" # Characters filtered
73 | bool_values: ClassVar[Dict[bool, str]] = { # Values to which boolean values are mapped.
74 | True: "true",
75 | False: "false",
76 | }
77 |
78 | # String matching operators. if none is appropriate eq_token is used.
79 | startswith_expression: ClassVar[str] = "{field} startswith {value}"
80 | endswith_expression: ClassVar[str] = "{field} endswith {value}"
81 | contains_expression: ClassVar[str] = "{field} contains {value}"
82 | wildcard_match_expression: ClassVar[Union[str, None]] = (
83 | None # Special expression if wildcards can't be matched with the eq_token operator
84 | )
85 |
86 | # Regular expressions
87 | re_expression: ClassVar[str] = (
88 | '{field} matches regex "{regex}"' # Regular expression query as format string with placeholders {field} and {regex}
89 | )
90 | re_escape_char: ClassVar[str] = "\\" # Character used for escaping in regular expressions
91 | re_escape: ClassVar[Tuple[str, ...]] = () # List of strings that are escaped
92 | re_escape_escape_char: bool = True # If True, the escape character is also escaped
93 |
94 | # cidr expressions
95 | cidr_wildcard: ClassVar[str] = "*" # Character used as single wildcard
96 | cidr_expression: ClassVar[str] = (
97 | 'ipv4_is_in_range({field}, "{value}")' # CIDR expression query as format string with placeholders {field} = {value}
98 | )
99 | cidr_in_list_expression: ClassVar[str] = (
100 | 'ipv4_is_in_any_range({field}, "{value}")' # CIDR expression query as format string with placeholders {field} = in({list})
101 | )
102 |
103 | # Numeric comparison operators
104 | compare_op_expression: ClassVar[str] = (
105 | "{field} {operator} {value}" # Compare operation query as format string with placeholders {field}, {operator} and {value}
106 | )
107 | # Mapping between CompareOperators elements and strings used as replacement for {operator} in compare_op_expression
108 | compare_operators: ClassVar[Dict[SigmaCompareExpression.CompareOperators, str]] = {
109 | SigmaCompareExpression.CompareOperators.LT: "<",
110 | SigmaCompareExpression.CompareOperators.LTE: "<=",
111 | SigmaCompareExpression.CompareOperators.GT: ">",
112 | SigmaCompareExpression.CompareOperators.GTE: ">=",
113 | }
114 |
115 | # Null/None expressions
116 | field_null_expression: ClassVar[str] = (
117 | "isnull({field})" # Expression for field has null value as format string with {field} placeholder for field name
118 | )
119 |
120 | # Field value in list, e.g. "field in (value list)" or "field containsall (value list)"
121 | convert_or_as_in: ClassVar[bool] = True # Convert OR as in-expression
122 | convert_and_as_in: ClassVar[bool] = True # Convert AND as in-expression
123 | in_expressions_allow_wildcards: ClassVar[bool] = (
124 | True # Values in list can contain wildcards. If set to False (default) only plain values are converted into in-expressions.
125 | )
126 | field_in_list_expression: ClassVar[str] = (
127 | "{field} {op} ({list})" # Expression for field in list of values as format string with placeholders {field}, {op} and {list}
128 | )
129 | or_in_operator: ClassVar[str] = (
130 | "in~" # Operator used to convert OR into in-expressions. Must be set if convert_or_as_in is set
131 | )
132 | and_in_operator: ClassVar[str] = (
133 | "has_all" # Operator used to convert AND into in-expressions. Must be set if convert_and_as_in is set
134 | )
135 | list_separator: ClassVar[str] = ", " # List element separator
136 |
137 | # Value not bound to a field
138 | unbound_value_str_expression: ClassVar[str] = (
139 | "{value}" # Expression for string value not bound to a field as format string with placeholder {value}
140 | )
141 | unbound_value_num_expression: ClassVar[str] = (
142 | "{value}" # Expression for number value not bound to a field as format string with placeholder {value}
143 | )
144 | unbound_value_re_expression: ClassVar[str] = (
145 | "_=~{value}" # Expression for regular expression not bound to a field as format string with placeholder {value}
146 | )
147 |
148 | # Query finalization: appending and concatenating deferred query part
149 | deferred_start: ClassVar[str] = "\n| " # String used as separator between main query and deferred parts
150 | deferred_separator: ClassVar[str] = "\n| " # String used to join multiple deferred query parts
151 | deferred_only_query: ClassVar[str] = "*" # String used as query if final query only contains deferred expression
152 |
153 | # We use =~ for eq_token so everything is case insensitive. But this cannot be used with ints/numbers in queries
154 | # So we can define a new token to use for SigmaNumeric types and override convert_condition_field_eq_val_num
155 | # to use it
156 | num_eq_token: ClassVar[str] = " == "
157 |
158 | # Override methods
159 |
160 | # For numeric values, need == instead of =~
161 | def convert_condition_field_eq_val_num(
162 | self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
163 | ) -> Union[str, DeferredQueryExpression]:
164 | """Conversion of field = number value expressions"""
165 | try:
166 | return self.escape_and_quote_field(cond.field) + self.num_eq_token + str(cond.value)
167 | except TypeError: # pragma: no cover
168 | raise NotImplementedError("Field equals numeric value expressions are not supported by the backend.")
169 |
170 | def convert_condition_as_in_expression(
171 | self, cond: Union[ConditionOR, ConditionAND], state: ConversionState
172 | ) -> Union[str, DeferredQueryExpression]:
173 | """Overridden method for conversion of field in value list conditions.
174 | KQL doesn't really use wildcards, so if we have an 'as_in' condition where one or more of the values has a wildcard,
175 | we can still use the as_in condition, then append on the wildcard value(s) with a startswith, endswith, or contains
176 | expression
177 | """
178 |
179 | field = self.escape_and_quote_field(cond.args[0].field) # type: ignore
180 | op1 = self.or_in_operator if isinstance(cond, ConditionOR) else self.and_in_operator
181 | op2 = self.or_token if isinstance(cond, ConditionOR) else self.and_token
182 | list_nonwildcard = self.list_separator.join(
183 | [
184 | self.convert_value_str(arg.value, state)
185 | for arg in cond.args
186 | if isinstance(arg, ConditionFieldEqualsValueExpression)
187 | and (
188 | (isinstance(arg.value, SigmaString) and not arg.value.contains_special())
189 | or (isinstance(arg.value, SigmaNumber))
190 | )
191 | ]
192 | )
193 | list_wildcards = [
194 | arg.value
195 | for arg in cond.args
196 | if isinstance(arg, ConditionFieldEqualsValueExpression)
197 | and isinstance(arg.value, SigmaString)
198 | and arg.value.contains_special()
199 | ]
200 | as_in_expr = ""
201 | # Convert as_in and wildcard values separately
202 | if list_nonwildcard:
203 | as_in_expr = self.field_in_list_expression.format(field=field, op=op1, list=list_nonwildcard)
204 | wildcard_exprs_list = []
205 | if list_wildcards:
206 | for arg in list_wildcards:
207 | new_cond = ConditionFieldEqualsValueExpression(field=field, value=arg)
208 | if arg[1:-1].contains_special(): # Wildcard in string, not at start or end.
209 | # We need to get rid of all wildcards, and create a 'and contains' for each element in the list
210 | expr = f"{self.token_separator}{self.and_token}{self.token_separator}".join(
211 | [
212 | self.contains_expression.format(
213 | field=field, value=self.convert_value_str(SigmaString(str(x)), state)
214 | )
215 | for x in arg.s
216 | if not isinstance(x, SpecialChars)
217 | ]
218 | )
219 | expr = self.group_expression.format(expr=expr)
220 | else:
221 | expr = self.convert_condition_field_eq_val_str(new_cond, state)
222 | wildcard_exprs_list.append(expr)
223 | wildcard_exprs = f"{self.token_separator}{op2}{self.token_separator}".join(wildcard_exprs_list)
224 | if as_in_expr and wildcard_exprs:
225 | return as_in_expr + self.token_separator + op2 + self.token_separator + wildcard_exprs
226 | return as_in_expr + wildcard_exprs
227 |
228 | def convert_condition_not(self, cond: ConditionNOT, state: ConversionState) -> Union[str, DeferredQueryExpression]:
229 | """Conversion of NOT conditions. Overridden to surround the group or expr of the 'not' negation with parens,
230 | as expected by KQL.
231 | """
232 | arg = cond.args[0]
233 | try:
234 | if arg.__class__ in self.precedence: # group if AND or OR condition is negated
235 | return self.not_token + "(" + str(self.convert_condition_group(arg, state)) + ")" # type: ignore
236 | else:
237 | expr = self.convert_condition(arg, state) # type: ignore
238 | if isinstance(expr, DeferredQueryExpression): # negate deferred expression and pass it to parent
239 | return expr.negate()
240 | else: # convert negated expression to string
241 | return self.not_token + "(" + expr + ")"
242 | except TypeError: # pragma: no cover
243 | raise NotImplementedError("Operator 'not' not supported by the backend")
244 |
245 | def convert_value_str(self, s: Union[SigmaString, SigmaNumber], state: ConversionState) -> str:
246 | """Convert a SigmaString into a plain string which can be used in query."""
247 | if not isinstance(s, SigmaString):
248 | s = SigmaString(str(s))
249 | converted = super().convert_value_str(s, state)
250 | # If we have a wildcard in a string, we need to un-escape it
251 | # See issue #13
252 | return re.sub(r"\\\*", r"*", converted)
253 |
--------------------------------------------------------------------------------
/sigma/backends/microsoft365defender/__init__.py:
--------------------------------------------------------------------------------
1 | from ..kusto.kusto import KustoBackend
2 |
3 | # TODO: add all backend classes that should be exposed to the user of your backend in the import statement above.
4 |
5 | backends = { # Mapping between backend identifiers and classes. This is used by the pySigma plugin system to recognize backends and expose them with the identifier.
6 | "microsoft365defender": KustoBackend,
7 | }
8 |
--------------------------------------------------------------------------------
/sigma/backends/microsoft365defender/microsoft365defender.py:
--------------------------------------------------------------------------------
1 | from typing import ClassVar, Dict
2 |
3 | from sigma.backends.kusto.kusto import KustoBackend
4 |
5 |
6 | class Microsoft365DefenderBackend(KustoBackend):
7 | """Microsoft 365 Defender Kusto Backend."""
8 |
9 | name: ClassVar[str] = "[DEPRECATED] Microsoft 365 Defender Backend"
10 | identifier: ClassVar[str] = "microsoft365defender"
11 | formats: ClassVar[Dict[str, str]] = {
12 | "default": "Microsoft 365 Defender Kusto Query Language search strings",
13 | }
14 |
--------------------------------------------------------------------------------
/sigma/pipelines/azuremonitor/__init__.py:
--------------------------------------------------------------------------------
1 | from .azuremonitor import azure_monitor_pipeline
2 |
3 | pipelines = {
4 | "azure_monitor": azure_monitor_pipeline,
5 | }
6 |
--------------------------------------------------------------------------------
/sigma/pipelines/azuremonitor/azuremonitor.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from sigma.pipelines.kusto_common.postprocessing import (
4 | PrependQueryTablePostprocessingItem,
5 | )
6 | from sigma.processing.conditions import (
7 | ExcludeFieldCondition,
8 | IncludeFieldCondition,
9 | LogsourceCondition,
10 | RuleProcessingItemAppliedCondition,
11 | RuleProcessingStateCondition,
12 | )
13 | from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline
14 | from sigma.processing.transformations import (
15 | DropDetectionItemTransformation,
16 | ReplaceStringTransformation,
17 | RuleFailureTransformation,
18 | )
19 |
20 | from ..kusto_common.errors import InvalidFieldTransformation
21 | from ..kusto_common.schema import create_schema
22 | from ..kusto_common.transformations import (
23 | DynamicFieldMappingTransformation,
24 | RegistryActionTypeValueTransformation,
25 | SetQueryTableStateTransformation,
26 | )
27 | from .mappings import (
28 | AZURE_MONITOR_FIELD_MAPPINGS,
29 | CATEGORY_TO_TABLE_MAPPINGS,
30 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS,
31 | )
32 | from .schema import AzureMonitorSchema
33 | from .tables import AZURE_MONITOR_TABLES
34 | from .transformations import (
35 | DefaultHashesValuesTransformation,
36 | SecurityEventHashesValuesTransformation,
37 | )
38 |
39 | AZURE_MONITOR_SCHEMA = create_schema(AzureMonitorSchema, AZURE_MONITOR_TABLES)
40 |
41 | # Drop ObjectType fields
42 | drop_fields_proc_item = ProcessingItem(
43 | identifier="azure_monitor_drop_fields",
44 | transformation=DropDetectionItemTransformation(),
45 | field_name_conditions=[IncludeFieldCondition(["ObjectType"])],
46 | )
47 |
48 | ## Fieldmappings
49 | fieldmappings_proc_item = ProcessingItem(
50 | identifier="azure_monitor_table_fieldmappings",
51 | transformation=DynamicFieldMappingTransformation(AZURE_MONITOR_FIELD_MAPPINGS),
52 | )
53 |
54 | ## Generic Field Mappings, keep this last
55 | ## Exclude any fields already mapped, e.g. if a table mapping has been applied.
56 | # This will fix the case where ProcessId is usually mapped to InitiatingProcessId, EXCEPT for the DeviceProcessEvent table where it stays as ProcessId.
57 | # So we can map ProcessId to ProcessId in the DeviceProcessEvents table mapping, and prevent the generic mapping to InitiatingProcessId from being applied
58 | # by adding a detection item condition that the table field mappings have been applied
59 |
60 | # generic_field_mappings_proc_item = ProcessingItem(
61 | # identifier="azure_monitor_generic_fieldmappings",
62 | # transformation=GenericFieldMappingTransformation(AZURE_MONITOR_FIELD_MAPPINGS),
63 | # detection_item_conditions=[DetectionItemProcessingItemAppliedCondition("azure_monitor_table_fieldmappings")],
64 | # detection_item_condition_linking=any,
65 | # detection_item_condition_negation=True,
66 | # )
67 |
68 | REGISTRY_FIELDS = [
69 | "RegistryKey",
70 | "RegistryPreviousKey",
71 | "ObjectName",
72 | ]
73 |
74 | ## Field Value Replacements ProcessingItems
75 | replacement_proc_items = [
76 | # Sysmon uses abbreviations in RegistryKey values, replace with full key names as the DeviceRegistryEvents schema
77 | # expects them to be
78 | # Note: Ensure this comes AFTER field mapping renames, as we're specifying DeviceRegistryEvent fields
79 | #
80 | # Do this one first, or else the HKLM only one will replace HKLM and mess up the regex
81 | ProcessingItem(
82 | identifier="azure_monitor_registry_key_replace_currentcontrolset",
83 | transformation=ReplaceStringTransformation(
84 | regex=r"(?i)(^HKLM\\SYSTEM\\CurrentControlSet)",
85 | replacement=r"HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet001",
86 | ),
87 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)],
88 | ),
89 | ProcessingItem(
90 | identifier="azure_monitor_registry_key_replace_hklm",
91 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKLM)", replacement=r"HKEY_LOCAL_MACHINE"),
92 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)],
93 | ),
94 | ProcessingItem(
95 | identifier="azure_monitor_registry_key_replace_hku",
96 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKU)", replacement=r"HKEY_USERS"),
97 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)],
98 | ),
99 | ProcessingItem(
100 | identifier="azure_monitor_registry_key_replace_hkcr",
101 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKCR)", replacement=r"HKEY_LOCAL_MACHINE\\CLASSES"),
102 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)],
103 | ),
104 | ProcessingItem(
105 | identifier="azure_monitor_registry_actiontype_value",
106 | transformation=RegistryActionTypeValueTransformation(),
107 | field_name_conditions=[IncludeFieldCondition(["EventType"])],
108 | ),
109 | # Processing item to transform the Hashes field in the SecurityEvent table to get rid of the hash algorithm prefix in each value
110 | ProcessingItem(
111 | identifier="azure_monitor_securityevent_hashes_field_values",
112 | transformation=SecurityEventHashesValuesTransformation(),
113 | field_name_conditions=[IncludeFieldCondition(["FileHash"])],
114 | rule_conditions=[RuleProcessingStateCondition("query_table", "SecurityEvent")],
115 | ),
116 | ProcessingItem(
117 | identifier="azure_monitor_hashes_field_values",
118 | transformation=DefaultHashesValuesTransformation(),
119 | field_name_conditions=[IncludeFieldCondition(["Hashes"])],
120 | rule_conditions=[RuleProcessingStateCondition("query_table", "SecurityEvent")],
121 | rule_condition_negation=True,
122 | ),
123 | # Processing item to essentially ignore initiated field
124 | ProcessingItem(
125 | identifier="azure_monitor_network_initiated_field",
126 | transformation=DropDetectionItemTransformation(),
127 | field_name_conditions=[IncludeFieldCondition(["Initiated"])],
128 | rule_conditions=[LogsourceCondition(category="network_connection")],
129 | ),
130 | ]
131 |
132 | # Exceptions/Errors ProcessingItems
133 | # Catch-all for when the query table is not set, meaning the rule could not be mapped to a table or the table name was not set
134 | rule_error_proc_items = [
135 | # Category Not Supported or Query Table Not Set
136 | ProcessingItem(
137 | identifier="azure_monitor_unsupported_rule_category_or_missing_query_table",
138 | transformation=RuleFailureTransformation(
139 | "Rule category not yet supported by the Azure Monitor pipeline or query_table is not set."
140 | ),
141 | rule_conditions=[
142 | RuleProcessingItemAppliedCondition("azure_monitor_set_query_table"), # type: ignore
143 | RuleProcessingStateCondition("query_table", None), # type: ignore
144 | ],
145 | rule_condition_linking=all,
146 | )
147 | ]
148 |
149 |
150 | def get_valid_fields(table_name):
151 | return (
152 | list(AZURE_MONITOR_SCHEMA.tables[table_name].fields.keys())
153 | + list(AZURE_MONITOR_FIELD_MAPPINGS.table_mappings.get(table_name, {}).keys())
154 | + list(AZURE_MONITOR_FIELD_MAPPINGS.generic_mappings.keys())
155 | + ["Hashes"]
156 | )
157 |
158 |
159 | field_error_proc_items = []
160 |
161 | for table_name in AZURE_MONITOR_SCHEMA.tables.keys():
162 | valid_fields = get_valid_fields(table_name)
163 |
164 | field_error_proc_items.append(
165 | ProcessingItem(
166 | identifier=f"azure_monitor_unsupported_fields_{table_name}",
167 | transformation=InvalidFieldTransformation(
168 | f"Please use valid fields for the {table_name} table, or the following fields that have fieldmappings in this "
169 | f"pipeline:\n{', '.join(sorted(set(valid_fields)))}"
170 | ),
171 | field_name_conditions=[ExcludeFieldCondition(fields=valid_fields)],
172 | rule_conditions=[
173 | RuleProcessingItemAppliedCondition("azure_monitor_set_query_table"),
174 | RuleProcessingStateCondition("query_table", table_name),
175 | ],
176 | rule_condition_linking=all,
177 | )
178 | )
179 |
180 | # Add a catch-all error for custom table names
181 | field_error_proc_items.append(
182 | ProcessingItem(
183 | identifier="azure_monitor_unsupported_fields_custom",
184 | transformation=InvalidFieldTransformation(
185 | "Invalid field name for the custom table. Please ensure you're using valid fields for your custom table."
186 | ),
187 | field_name_conditions=[
188 | ExcludeFieldCondition(fields=list(AZURE_MONITOR_FIELD_MAPPINGS.generic_mappings.keys()) + ["Hashes"])
189 | ],
190 | rule_conditions=[
191 | RuleProcessingItemAppliedCondition("azure_monitor_set_query_table"), # type: ignore
192 | RuleProcessingStateCondition("query_table", None), # type: ignore
193 | ],
194 | rule_condition_linking=all,
195 | )
196 | )
197 |
198 |
199 | def azure_monitor_pipeline(query_table: Optional[str] = None) -> ProcessingPipeline:
200 | """Pipeline for transformations for SigmaRules to use in the Kusto Query Language backend.
201 |
202 | :param query_table: If specified, the table name will be used in the finalizer, otherwise the table name will be selected based on the category of the rule.
203 | :type query_table: Optional[str]
204 |
205 | :return: ProcessingPipeline for Microsoft Azure Monitor
206 | :rtype: ProcessingPipeline
207 | """
208 |
209 | pipeline_items = [
210 | ProcessingItem(
211 | identifier="azure_monitor_set_query_table",
212 | transformation=SetQueryTableStateTransformation(
213 | query_table, CATEGORY_TO_TABLE_MAPPINGS, EVENTID_CATEGORY_TO_TABLE_MAPPINGS
214 | ),
215 | ),
216 | fieldmappings_proc_item,
217 | drop_fields_proc_item,
218 | # generic_field_mappings_proc_item,
219 | *replacement_proc_items,
220 | *rule_error_proc_items,
221 | *field_error_proc_items,
222 | ]
223 |
224 | return ProcessingPipeline(
225 | name="Generic Log Sources to Azure Monitor tables and fields",
226 | priority=10,
227 | items=pipeline_items,
228 | allowed_backends=frozenset(["kusto"]),
229 | postprocessing_items=[PrependQueryTablePostprocessingItem], # type: ignore
230 | )
231 |
--------------------------------------------------------------------------------
/sigma/pipelines/azuremonitor/mappings.py:
--------------------------------------------------------------------------------
1 | from sigma.pipelines.common import (
2 | logsource_windows_file_access,
3 | logsource_windows_file_change,
4 | logsource_windows_file_delete,
5 | logsource_windows_file_event,
6 | logsource_windows_file_rename,
7 | logsource_windows_image_load,
8 | logsource_windows_network_connection,
9 | logsource_windows_process_creation,
10 | logsource_windows_registry_add,
11 | logsource_windows_registry_delete,
12 | logsource_windows_registry_event,
13 | logsource_windows_registry_set,
14 | )
15 | from sigma.pipelines.kusto_common.schema import FieldMappings
16 |
17 |
18 | class AzureMonitorFieldMappings(FieldMappings):
19 | pass
20 |
21 |
22 | # Just map to SecurityEvent for now until we have more mappings for other tables
23 | CATEGORY_TO_TABLE_MAPPINGS = {
24 | "process_creation": "SecurityEvent",
25 | "image_load": "SecurityEvent",
26 | "file_access": "SecurityEvent",
27 | "file_change": "SecurityEvent",
28 | "file_delete": "SecurityEvent",
29 | "file_event": "SecurityEvent",
30 | "file_rename": "SecurityEvent",
31 | "registry_add": "SecurityEvent",
32 | "registry_delete": "SecurityEvent",
33 | "registry_event": "SecurityEvent",
34 | "registry_set": "SecurityEvent",
35 | "network_connection": "SecurityEvent",
36 | }
37 |
38 |
39 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS = {
40 | "process": "SecurityEvent",
41 | "logon": "SecurityEvent",
42 | "registry": "SecurityEvent",
43 | "file": "SecurityEvent",
44 | "network": "SecurityEvent",
45 | "image_load": "SecurityEvent",
46 | "pipe": "SecurityEvent",
47 | "wmi": "SecurityEvent",
48 | }
49 |
50 | ## Rule Categories -> RuleConditions
51 | CATEGORY_TO_CONDITIONS_MAPPINGS = {
52 | "process_creation": logsource_windows_process_creation(),
53 | "image_load": logsource_windows_image_load(),
54 | "file_access": logsource_windows_file_access(),
55 | "file_change": logsource_windows_file_change(),
56 | "file_delete": logsource_windows_file_delete(),
57 | "file_event": logsource_windows_file_event(),
58 | "file_rename": logsource_windows_file_rename(),
59 | "registry_add": logsource_windows_registry_add(),
60 | "registry_delete": logsource_windows_registry_delete(),
61 | "registry_event": logsource_windows_registry_event(),
62 | "registry_set": logsource_windows_registry_set(),
63 | "network_connection": logsource_windows_network_connection(),
64 | }
65 |
66 |
67 | AZURE_MONITOR_FIELD_MAPPINGS = AzureMonitorFieldMappings(
68 | table_mappings={
69 | "SecurityEvent": {
70 | "CommandLine": "CommandLine",
71 | "Image": "NewProcessName",
72 | "ParentImage": "ParentProcessName",
73 | "User": "SubjectUserName",
74 | "TargetFilename": "ObjectName",
75 | "SourceIp": "IpAddress",
76 | "DestinationIp": "DestinationIp",
77 | "DestinationPort": "DestinationPort",
78 | "SourcePort": "SourcePort",
79 | "SourceHostname": "WorkstationName",
80 | "DestinationHostname": "DestinationHostname",
81 | "EventID": "EventID",
82 | "ProcessId": "NewProcessId",
83 | "ProcessName": "NewProcessName",
84 | "LogonType": "LogonType",
85 | "TargetUserName": "TargetUserName",
86 | "TargetDomainName": "TargetDomainName",
87 | "TargetLogonId": "TargetLogonId",
88 | "Status": "Status",
89 | "SubStatus": "SubStatus",
90 | "ObjectType": "ObjectType",
91 | "ShareName": "ShareName",
92 | "AccessMask": "AccessMask",
93 | "ServiceName": "ServiceName",
94 | "TicketOptions": "TicketOptions",
95 | "TicketEncryptionType": "TicketEncryptionType",
96 | "TransmittedServices": "TransmittedServices",
97 | "WorkstationName": "WorkstationName",
98 | "LogonProcessName": "LogonProcessName",
99 | "LogonGuid": "LogonGuid",
100 | "Category": "EventSourceName",
101 | "Hashes": "FileHash",
102 | "TargetObject": "ObjectName",
103 | },
104 | "SigninLogs": {
105 | "User": "UserPrincipalName",
106 | "TargetUserName": "UserPrincipalName",
107 | "src_ip": "IPAddress",
108 | "IpAddress": "IPAddress",
109 | "app": "AppDisplayName",
110 | "Application": "AppDisplayName",
111 | "AuthenticationMethod": "AuthenticationMethodsUsed",
112 | "Status": "Status",
113 | "ResultType": "ResultType",
114 | "ResultDescription": "ResultDescription",
115 | "UserAgent": "UserAgent",
116 | "Location": "Location",
117 | "ClientAppUsed": "ClientAppUsed",
118 | "DeviceDetail": "DeviceDetail",
119 | "CorrelationId": "CorrelationId",
120 | "ConditionalAccessStatus": "ConditionalAccessStatus",
121 | "RiskLevelAggregated": "RiskLevelAggregated",
122 | "RiskLevelDuringSignIn": "RiskLevelDuringSignIn",
123 | "RiskDetail": "RiskDetail",
124 | "RiskState": "RiskState",
125 | "MfaDetail": "MfaDetail",
126 | "NetworkLocationDetails": "NetworkLocationDetails",
127 | "AuthenticationProtocol": "AuthenticationProtocol",
128 | "AuthenticationRequirement": "AuthenticationRequirement",
129 | "SignInIdentifier": "SignInIdentifier",
130 | "SignInIdentifierType": "SignInIdentifierType",
131 | "ResourceDisplayName": "ResourceDisplayName",
132 | "ResourceIdentity": "ResourceIdentity",
133 | "AppId": "AppId",
134 | "AuthenticationProcessingDetails": "AuthenticationProcessingDetails",
135 | "IsInteractive": "IsInteractive",
136 | "TokenIssuerName": "TokenIssuerName",
137 | "TokenIssuerType": "TokenIssuerType",
138 | "UserType": "UserType",
139 | "IPAddress": "IPAddress",
140 | "AutonomousSystemNumber": "AutonomousSystemNumber",
141 | },
142 | },
143 | generic_mappings={},
144 | )
145 |
--------------------------------------------------------------------------------
/sigma/pipelines/azuremonitor/schema.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 |
3 | from sigma.pipelines.kusto_common.schema import BaseSchema, FieldMappings
4 |
5 |
6 | @dataclass
7 | class AzureMonitorSchema(BaseSchema):
8 | pass
9 |
10 |
11 | @dataclass
12 | class AzureMonitorFieldMappings(FieldMappings):
13 | pass
14 |
--------------------------------------------------------------------------------
/sigma/pipelines/azuremonitor/transformations.py:
--------------------------------------------------------------------------------
1 | from ..kusto_common.transformations import BaseHashesValuesTransformation
2 |
3 |
4 | class SecurityEventHashesValuesTransformation(BaseHashesValuesTransformation):
5 | """
6 | Transforms the FileHash (originally Hashes) field in SecurityEvent table to get rid of the hash algorithm prefix in each value.
7 | """
8 |
9 | def __init__(self):
10 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256"], field_prefix="FileHash", drop_algo_prefix=True)
11 |
12 |
13 | class DefaultHashesValuesTransformation(BaseHashesValuesTransformation):
14 | """
15 | Transforms the Hashes field in XDR Tables to create fields for each hash algorithm.
16 | """
17 |
18 | def __init__(self):
19 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256"], field_prefix="")
20 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AttackIQ/pySigma-backend-kusto/6f6d378e22db272a5cffd734ae66b773162ef75d/sigma/pipelines/kusto_common/__init__.py
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/conditions.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Union
3 |
4 | from sigma.correlations import SigmaCorrelationRule
5 | from sigma.processing.conditions import RuleProcessingCondition
6 | from sigma.rule import SigmaRule
7 |
8 |
9 | @dataclass
10 | class QueryTableSetCondition(RuleProcessingCondition):
11 | def match(
12 | self,
13 | pipeline: "sigma.processing.pipeline.ProcessingPipeline", # noqa: F821 # type: ignore
14 | rule: Union[SigmaRule, SigmaCorrelationRule],
15 | ) -> bool:
16 | """Match condition on Sigma rule."""
17 | return pipeline.state.get("query_table", None) is not None
18 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/errors.py:
--------------------------------------------------------------------------------
1 | from sigma.processing.transformations import (
2 | DetectionItemFailureTransformation,
3 | SigmaTransformationError,
4 | )
5 | from sigma.rule import SigmaDetectionItem
6 |
7 |
8 | class InvalidFieldTransformation(DetectionItemFailureTransformation):
9 | """
10 | Overrides the apply_detection_item() method from DetectionItemFailureTransformation to also include the field name
11 | in the error message
12 | """
13 |
14 | def apply_detection_item(self, detection_item: SigmaDetectionItem) -> None:
15 | field_name = detection_item.field
16 | if field_name: # If no field name is set, don't raise an error because its a keyword
17 | self.message = f"Invalid SigmaDetectionItem field name encountered: {field_name}. " + self.message
18 | raise SigmaTransformationError(self.message)
19 |
20 |
21 | class InvalidHashAlgorithmError(Exception):
22 | pass
23 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/finalization.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List
3 |
4 | from sigma.processing.finalization import Finalizer
5 |
6 |
7 | @dataclass
8 | class QueryTableFinalizer(Finalizer):
9 | """Finalizer for pipelines using the Kusto Query Language to add in the table name as a prefix to the query.
10 |
11 | The query_table is set by the SetQueryTableStateTransformation transformation that is applied to each rule at the very beginning of the pipeline;
12 | the query table can be supplied as an argument to the pipeline, set in a previous ProcessingPipeline (which is combined into a single pipeline in sigma_cli), or is
13 | set by the rules category or other criteria from other transformations.
14 |
15 | The standard finalizers append all queries together into a single query string. However, this finalizer
16 | will keep individual queries separate and add the table name as a prefix to each query.
17 |
18 | A custom table name can be specified in the finalizer, otherwise the table name will be selected based on the processing pipeline's state 'query_table' key.
19 | """
20 |
21 | table_names: str = None
22 |
23 | def apply(self, pipeline: "sigma.processing.pipeline.ProcessingPipeline", queries: List[str]) -> List[str]: # type: ignore # noqa: F821
24 | for i, query in enumerate(queries):
25 | if self.table_names:
26 | queries[i] = f"{self.table_names}\n| where {query}"
27 | elif "query_table" in pipeline.state:
28 | queries[i] = f"{pipeline.state['query_table']}\n| where {query}"
29 | else:
30 | queries[i] = f"search {query}"
31 | return queries
32 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/mappings.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | # Event ID Categories based on Windows Security Events
4 | EVENTID_CATEGORIES = {
5 | "process": [1, 5, 10, 25, 4688, 4689, 4696], # Process creation, termination, access, tampering
6 | "logon": [4624, 4625, 4634, 4647, 4648, 4778, 4779, 4800, 4801, 4802, 4803], # Logon/logoff events
7 | "registry": [4656, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 12, 13, 14], # Registry operations
8 | "file": [2, 11, 15, 23, 26, 27, 28, 29, 4656, 4658, 4660, 4663], # File operations
9 | "network": [3, 22, 5140, 5145, 5156, 5157, 5158, 5159], # Network and DNS events
10 | "image_load": [7], # Image loaded
11 | "pipe": [17, 18], # Pipe events
12 | "wmi": [19, 20, 21], # WMI events
13 | "service": [4697, 4698, 4699, 4700, 4701, 4702], # Service and scheduled task operations
14 | "account": [4720, 4722, 4723, 4724, 4725, 4726, 4738, 4740, 4767], # Account management
15 | }
16 |
17 |
18 | def get_category_from_eventid(eventid: int) -> Optional[str]:
19 | """
20 | Determine the category based on the Event ID
21 | """
22 | return next((category for category, eventids in EVENTID_CATEGORIES.items() if eventid in eventids), None)
23 |
24 |
25 | def get_table_from_eventid(eventid: int, category_table_mappings: dict) -> str:
26 | """
27 | Get the appropriate table name for a given EventID and backend type
28 | """
29 |
30 | category = get_category_from_eventid(eventid)
31 | if category and category in category_table_mappings:
32 | return category_table_mappings[category]
33 | return ""
34 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/postprocessing.py:
--------------------------------------------------------------------------------
1 | from sigma.processing.pipeline import QueryPostprocessingItem
2 | from sigma.processing.postprocessing import QueryPostprocessingTransformation
3 | from sigma.rule import SigmaRule
4 |
5 | from ..kusto_common.conditions import QueryTableSetCondition
6 |
7 |
8 | class PrependQueryTablePostprocessingItem(QueryPostprocessingTransformation):
9 | def apply(self, pipeline: "sigma.processing.pipeline.ProcessingPipeline", rule: SigmaRule, query: str) -> str: # type: ignore # noqa: F821
10 | return f"{pipeline.state['query_table']}\n| where {query}"
11 |
12 |
13 | PrependQueryTablePostprocessingItem = QueryPostprocessingItem(
14 | identifier="kusto_prepend_query_table",
15 | transformation=PrependQueryTablePostprocessingItem(),
16 | rule_conditions=[
17 | QueryTableSetCondition(),
18 | ],
19 | )
20 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/schema.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 | from typing import Dict, List, Optional, Union
3 |
4 |
5 | @dataclass
6 | class FieldInfo:
7 | data_type: str
8 | description: str
9 |
10 |
11 | @dataclass
12 | class TableSchema:
13 | fields: Dict[str, FieldInfo] = field(default_factory=dict)
14 |
15 | def get_field_type(self, field_name: str) -> Optional[str]:
16 | field = self.fields.get(field_name)
17 | return field.data_type if field else None
18 |
19 | def get_field_description(self, field_name: str) -> Optional[str]:
20 | field = self.fields.get(field_name)
21 | return field.description if field else None
22 |
23 | def get_valid_fields(self) -> List[str]:
24 | return list(self.fields.keys())
25 |
26 |
27 | @dataclass
28 | class BaseSchema:
29 | tables: Dict[str, TableSchema] = field(default_factory=dict)
30 |
31 | def get_field_type(self, table_name: str, field_name: str) -> Optional[str]:
32 | table = self.tables.get(table_name)
33 | return table.get_field_type(field_name) if table else None
34 |
35 | def get_field_description(self, table_name: str, field_name: str) -> Optional[str]:
36 | table = self.tables.get(table_name)
37 | return table.get_field_description(field_name) if table else None
38 |
39 | def get_valid_fields(self, table_name: str) -> List[str]:
40 | table = self.tables.get(table_name)
41 | return table.get_valid_fields() if table else []
42 |
43 |
44 | @dataclass
45 | class FieldMappings:
46 | table_mappings: Dict[str, Dict[str, Union[str, List[str]]]] = field(default_factory=dict)
47 | generic_mappings: Dict[str, str] = field(default_factory=dict)
48 |
49 | def get_field_mapping(self, table_name: str, sigma_field: str) -> str:
50 | table_mapping = self.table_mappings.get(table_name, {})
51 | mapping = table_mapping.get(sigma_field)
52 | if mapping:
53 | return mapping[0] if isinstance(mapping, list) else mapping
54 | return self.generic_mappings.get(sigma_field, sigma_field)
55 |
56 |
57 | def create_schema(schema_class, tables) -> BaseSchema:
58 | schema = schema_class()
59 | for table_name, fields in tables.items():
60 | table_schema = TableSchema()
61 | for field_name, field_info in fields.items():
62 | table_schema.fields[field_name] = FieldInfo(
63 | data_type=field_info["data_type"], description=field_info["description"]
64 | )
65 | schema.tables[table_name] = table_schema
66 | return schema
67 |
--------------------------------------------------------------------------------
/sigma/pipelines/kusto_common/transformations.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from dataclasses import dataclass, field
4 | from typing import Any, Dict, Iterable, List, Optional, Union
5 |
6 | from sigma.conditions import ConditionOR
7 | from sigma.processing.transformations import (
8 | DetectionItemTransformation,
9 | FieldMappingTransformation,
10 | Transformation,
11 | ValueTransformation,
12 | )
13 | from sigma.rule import SigmaDetection, SigmaDetectionItem
14 | from sigma.types import SigmaString, SigmaType
15 |
16 | from ..kusto_common.mappings import get_table_from_eventid
17 | from ..kusto_common.schema import FieldMappings
18 | from .errors import InvalidHashAlgorithmError, SigmaTransformationError
19 |
20 |
21 | class DynamicFieldMappingTransformation(FieldMappingTransformation):
22 | """
23 | Dynamically sets the mapping dictionary based on the pipeline state or rule's category.
24 |
25 | :param field_mappings: A FieldMappings schema object that contains the table_mappings and generic_mappings.
26 | :type field_mappings: FieldMappings schema object
27 | """
28 |
29 | def __init__(self, field_mappings: FieldMappings):
30 | super().__init__(field_mappings.generic_mappings) # type: ignore
31 | self.field_mappings = field_mappings
32 |
33 | def set_dynamic_mapping(self, pipeline):
34 | """
35 | Set the mapping dynamically based on the pipeline state 'query_table' or the rule's logsource category.
36 | """
37 |
38 | # We should always have a query_table in the pipeline state, will implement mapping based on rule category later if not
39 | if "query_table" in pipeline.state:
40 | query_table = pipeline.state["query_table"]
41 | self.mapping = self.field_mappings.table_mappings.get(query_table, {})
42 | else:
43 | # TODO: Implement mapping based on rule category
44 | pass
45 |
46 | def apply(
47 | self,
48 | pipeline: "sigma.processing.pipeline.ProcessingPipeline", # noqa: F821 # type: ignore
49 | rule: Union["SigmaRule", "SigmaCorrelationRule"], # noqa: F821 # type: ignore
50 | ) -> None:
51 | """Apply dynamic mapping before the field name transformations."""
52 | self.set_dynamic_mapping(pipeline) # Dynamically update the mapping
53 | super().apply(pipeline, rule) # Call parent method to continue the transformation process
54 |
55 |
56 | class GenericFieldMappingTransformation(FieldMappingTransformation):
57 | """
58 | Transformation for applying generic field mappings after table-specific mappings.
59 | """
60 |
61 | def __init__(self, field_mappings: FieldMappings):
62 | super().__init__(field_mappings.generic_mappings) # type: ignore
63 |
64 | def apply_detection_item(
65 | self, detection_item: SigmaDetectionItem
66 | ) -> Optional[Union[SigmaDetectionItem, SigmaString]]:
67 | if detection_item.field in self.mapping:
68 | detection_item.field = self.mapping[detection_item.field] # type: ignore
69 | return detection_item
70 |
71 |
72 | class BaseHashesValuesTransformation(DetectionItemTransformation):
73 | """
74 | Base class for transforming the Hashes field to get rid of the hash algorithm prefix in each value and create new detection items for each hash type.
75 | """
76 |
77 | def __init__(self, valid_hash_algos: List[str], field_prefix: Optional[str] = None, drop_algo_prefix: bool = False):
78 | """
79 | :param valid_hash_algos: A list of valid hash algorithms that are supported by the table.
80 | :param field_prefix: The prefix to use for the new detection items.
81 | :param drop_algo_prefix: Whether to drop the algorithm prefix in the new field name, e.g. "FileHashSHA256" -> "FileHash".
82 | """
83 | self.valid_hash_algos = valid_hash_algos
84 | self.field_prefix = field_prefix or ""
85 | self.drop_algo_prefix = drop_algo_prefix
86 |
87 | def apply_detection_item(
88 | self, detection_item: SigmaDetectionItem
89 | ) -> Optional[Union[SigmaDetection, SigmaDetectionItem]]:
90 | to_return = []
91 | no_valid_hash_algo = True
92 | algo_dict = defaultdict(list) # map to keep track of algos and lists of values
93 | if not isinstance(detection_item.value, list):
94 | detection_item.value = [detection_item.value]
95 | for d in detection_item.value:
96 | hash_value = d.to_plain().split("|") # sometimes if ALGO|VALUE
97 | if len(hash_value) == 1: # and sometimes its ALGO=VALUE
98 | hash_value = hash_value[0].split("=")
99 | if len(hash_value) == 2:
100 | hash_algo = (
101 | hash_value[0].lstrip("*").upper()
102 | if hash_value[0].lstrip("*").upper() in self.valid_hash_algos
103 | else ""
104 | )
105 | if hash_algo:
106 | no_valid_hash_algo = False
107 | hash_value = hash_value[1]
108 | else:
109 | hash_value = hash_value[0]
110 | if len(hash_value) == 32: # MD5
111 | hash_algo = "MD5"
112 | no_valid_hash_algo = False
113 | elif len(hash_value) == 40: # SHA1
114 | hash_algo = "SHA1"
115 | no_valid_hash_algo = False
116 | elif len(hash_value) == 64: # SHA256
117 | hash_algo = "SHA256"
118 | no_valid_hash_algo = False
119 | elif len(hash_value) == 128: # SHA512
120 | hash_algo = "SHA512"
121 | no_valid_hash_algo = False
122 | else: # Invalid algo, no fieldname for keyword search
123 | hash_algo = ""
124 |
125 | field_name = self.field_prefix
126 | if not self.drop_algo_prefix:
127 | field_name += hash_algo
128 | algo_dict[field_name].append(hash_value)
129 | if no_valid_hash_algo:
130 | raise InvalidHashAlgorithmError(
131 | "No valid hash algo found in Hashes field. Please use one of the following: "
132 | + ", ".join(self.valid_hash_algos)
133 | )
134 | for k, v in algo_dict.items():
135 | if k: # Filter out invalid hash algo types
136 | to_return.append(
137 | SigmaDetectionItem(
138 | field=k if k != "keyword" else None, modifiers=[], value=[SigmaString(x) for x in v]
139 | )
140 | )
141 | return SigmaDetection(detection_items=to_return, item_linking=ConditionOR)
142 |
143 |
144 | @dataclass
145 | class SetQueryTableStateTransformation(Transformation):
146 | """Sets rule query table in pipeline state query_table key
147 |
148 | The following priority is used to determine the value to set:
149 | 1) The value provided in the val argument
150 | 2) If the query_table is already set in the pipeline state, use that value (e.g. set in a previous pipeline, like via YAML in sigma-cli for user-defined query tables)
151 | 3) If the rule's logsource category is present in the category_to_table_mappings dictionary, use that value
152 | 4) If the rule has an EventID, use the table name from the eventid_to_table_mappings dictionary
153 | 5) If none of the above are present, raise an error
154 |
155 | :param val: The table name to set in the pipeline state. If not provided, the table name will be determined from the rule's logsource category.
156 | :param category_to_table_mappings: A dictionary mapping logsource categories to table names. If not provided, the default category_to_table_mappings will be used.
157 |
158 | """
159 |
160 | val: Any = None
161 | category_to_table_mappings: Dict[str, Any] = field(default_factory=dict)
162 | event_id_category_to_table_mappings: Dict[str, Any] = field(default_factory=dict)
163 |
164 | def apply_detection_item(self, detection_item: SigmaDetectionItem) -> Optional[str]:
165 | """
166 | Apply transformation on detection item. We need to set the query_table pipeline state key, so we return the table name string based on the EventID or EventCode.
167 | """
168 | if detection_item.field == "EventID" or detection_item.field == "EventCode":
169 | for value in detection_item.value:
170 | if table_name := get_table_from_eventid(
171 | int(value.to_plain()), self.event_id_category_to_table_mappings
172 | ):
173 | return table_name
174 | return None
175 |
176 | def apply_detection(self, detection: SigmaDetection) -> Optional[str]:
177 | """Apply transformation on detection. We need to set the event_type custom attribute on the rule, so we return the event_type string."""
178 | for i, detection_item in enumerate(detection.detection_items):
179 | if isinstance(detection_item, SigmaDetection): # recurse into nested detection items
180 | self.apply_detection(detection_item)
181 | else:
182 | if (
183 | self.processing_item is None
184 | or self.processing_item.match_detection_item(self._pipeline, detection_item)
185 | ) and (r := self.apply_detection_item(detection_item)) is not None:
186 | self.processing_item_applied(detection.detection_items[i])
187 | return r
188 |
189 | def apply(self, pipeline: "ProcessingPipeline", rule: "SigmaRule") -> None: # type: ignore # noqa: F821
190 | super().apply(pipeline, rule)
191 |
192 | # Init table_name to None, will be set in the following if statements
193 | table_name = None
194 | # Set table_name based on the following priority:
195 | # 1) The value provided in the val argument
196 | if self.val:
197 | table_name = self.val
198 | # 2) If the query_table is already set in the pipeline state, use that value (e.g. set in a previous pipeline, like via YAML in sigma-cli for user-defined query tables)
199 | elif pipeline.state.get("query_table"):
200 | table_name = pipeline.state.get("query_table")
201 | # 3) If the rule's logsource category is present in the category_to_table_mappings dictionary, use that value
202 | elif rule.logsource.category:
203 | category = rule.logsource.category
204 | table_name = self.category_to_table_mappings.get(category)
205 | # 4) Check if the rule has an EventID, use the table name from the eventid_to_table_mappings dictionary
206 | else:
207 | for section_title, detection in rule.detection.detections.items():
208 | # We only want event types from selection sections, not filters
209 | if re.match(r"^sel.*", section_title.lower()):
210 | if (r := self.apply_detection(detection)) is not None:
211 | table_name = r
212 | break
213 |
214 | if table_name:
215 | if isinstance(table_name, list):
216 | table_name = table_name[0] # Use the first table if it's a list
217 | pipeline.state["query_table"] = table_name
218 | else:
219 | raise SigmaTransformationError(
220 | f"Unable to determine table name from rule. The query table is determined in the following order of priority:\n"
221 | f" 1) The value provided to processing pipeline's query_table parameter, if using a Python script.\n"
222 | f" 2) If the query_table is already set in the pipeline state, such as from a custom user-defined pipeline if using sigma-cli.\n"
223 | f" 3) If the rule's logsource category is present in the pipeline's category_to_table_mappings dictionary in mappings.py, use that value.\n"
224 | f" 4) If the rule has an EventID, use the table name from the pipeline's eventid_to_table_mappings dictionary in mappings.py.\n"
225 | f"For more details, see https://github.com/AttackIQ/pySigma-backend-kusto/blob/main/README.md#%EF%B8%8F-custom-table-names-new-in-030-beta."
226 | )
227 |
228 |
229 | ## Change field value AFTER field transformations from Sysmon values to values expected in the pipelines registry table action field
230 | class RegistryActionTypeValueTransformation(ValueTransformation):
231 | """Custom ValueTransformation transformation. The Microsoft DeviceRegistryEvents table expect the ActionType to
232 | be a slightly different set of values than what Sysmon specified, so this will change them to the correct value."""
233 |
234 | value_mappings = { # Sysmon EventType -> DeviceRegistryEvents ActionType
235 | "CreateKey": "RegistryKeyCreated",
236 | "DeleteKey": ["RegistryKeyDeleted", "RegistryValueDeleted"],
237 | "SetValue": "RegistryValueSet",
238 | "RenameKey": ["RegistryValueSet", "RegistryKeyCreated"],
239 | }
240 |
241 | def apply_value(self, field: str, val: SigmaType) -> Optional[Union[SigmaType, Iterable[SigmaType]]]:
242 | mapped_vals = self.value_mappings.get(val.to_plain(), val.to_plain())
243 | if isinstance(mapped_vals, list):
244 | return [SigmaString(v) for v in mapped_vals]
245 | return SigmaString(mapped_vals)
246 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoft365defender/__init__.py:
--------------------------------------------------------------------------------
1 | from .microsoft365defender import microsoft_365_defender_pipeline
2 |
3 | pipelines = {
4 | "microsoft_365_defender_pipeline": microsoft_365_defender_pipeline, # DEPRECATED: Use microsoft_xdr_pipeline instead.
5 | }
6 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoft365defender/microsoft365defender.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from sigma.processing.pipeline import ProcessingPipeline
4 |
5 | from ..microsoftxdr import microsoft_xdr_pipeline
6 |
7 |
8 | def microsoft_365_defender_pipeline(
9 | transform_parent_image: Optional[bool] = True, query_table: Optional[str] = None
10 | ) -> ProcessingPipeline:
11 | """DEPRECATED: Use microsoft_xdr_pipeline instead."""
12 | return microsoft_xdr_pipeline(transform_parent_image, query_table)
13 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoftxdr/__init__.py:
--------------------------------------------------------------------------------
1 | from .microsoftxdr import microsoft_xdr_pipeline
2 |
3 | pipelines = {
4 | "microsoft_xdr_pipeline": microsoft_xdr_pipeline,
5 | }
6 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoftxdr/mappings.py:
--------------------------------------------------------------------------------
1 | from sigma.pipelines.common import (
2 | logsource_windows_file_access,
3 | logsource_windows_file_change,
4 | logsource_windows_file_delete,
5 | logsource_windows_file_event,
6 | logsource_windows_file_rename,
7 | logsource_windows_image_load,
8 | logsource_windows_network_connection,
9 | logsource_windows_process_creation,
10 | logsource_windows_registry_add,
11 | logsource_windows_registry_delete,
12 | logsource_windows_registry_event,
13 | logsource_windows_registry_set,
14 | )
15 | from sigma.pipelines.kusto_common.schema import FieldMappings
16 |
17 | ## Rule Categories -> Query Table Names
18 | CATEGORY_TO_TABLE_MAPPINGS = {
19 | "process_creation": "DeviceProcessEvents",
20 | "image_load": "DeviceImageLoadEvents",
21 | "file_access": "DeviceFileEvents",
22 | "file_change": "DeviceFileEvents",
23 | "file_delete": "DeviceFileEvents",
24 | "file_event": "DeviceFileEvents",
25 | "file_rename": "DeviceFileEvents",
26 | "registry_add": "DeviceRegistryEvents",
27 | "registry_delete": "DeviceRegistryEvents",
28 | "registry_event": "DeviceRegistryEvents",
29 | "registry_set": "DeviceRegistryEvents",
30 | "network_connection": "DeviceNetworkEvents",
31 | }
32 |
33 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS = {
34 | "process": "DeviceProcessEvents",
35 | "logon": "DeviceLogonEvents",
36 | "registry": "DeviceRegistryEvents",
37 | "file": "DeviceFileEvents",
38 | "network": "DeviceNetworkEvents",
39 | "image_load": "DeviceImageLoadEvents",
40 | "pipe": "DeviceEvents",
41 | "wmi": "DeviceEvents",
42 | }
43 |
44 | ## Rule Categories -> RuleConditions
45 | CATEGORY_TO_CONDITIONS_MAPPINGS = {
46 | "process_creation": logsource_windows_process_creation(),
47 | "image_load": logsource_windows_image_load(),
48 | "file_access": logsource_windows_file_access(),
49 | "file_change": logsource_windows_file_change(),
50 | "file_delete": logsource_windows_file_delete(),
51 | "file_event": logsource_windows_file_event(),
52 | "file_rename": logsource_windows_file_rename(),
53 | "registry_add": logsource_windows_registry_add(),
54 | "registry_delete": logsource_windows_registry_delete(),
55 | "registry_event": logsource_windows_registry_event(),
56 | "registry_set": logsource_windows_registry_set(),
57 | "network_connection": logsource_windows_network_connection(),
58 | }
59 |
60 |
61 | class MicrosoftXDRFieldMappings(FieldMappings):
62 | pass
63 |
64 |
65 | MICROSOFT_XDR_FIELD_MAPPINGS = MicrosoftXDRFieldMappings(
66 | table_mappings={
67 | "DeviceProcessEvents": { # process_creation, Sysmon EventID 1 -> DeviceProcessEvents table
68 | # ProcessGuid: ?,
69 | "ProcessId": "ProcessId",
70 | "Image": "FolderPath",
71 | "FileVersion": "ProcessVersionInfoProductVersion",
72 | "Description": "ProcessVersionInfoFileDescription",
73 | "Product": "ProcessVersionInfoProductName",
74 | "Company": "ProcessVersionInfoCompanyName",
75 | "OriginalFileName": "ProcessVersionInfoOriginalFileName",
76 | "CommandLine": "ProcessCommandLine",
77 | # CurrentDirectory: ?
78 | "User": "AccountName",
79 | # LogonGuid: ?
80 | "LogonId": "LogonId",
81 | # TerminalSessionId: ?
82 | "IntegrityLevel": "ProcessIntegrityLevel",
83 | "sha1": "SHA1",
84 | "sha256": "SHA256",
85 | "md5": "MD5",
86 | # 'ParentProcessGuid': ?,
87 | "ParentProcessId": "InitiatingProcessId",
88 | "ParentImage": "InitiatingProcessFolderPath",
89 | "ParentCommandLine": "InitiatingProcessCommandLine",
90 | "ParentUser": "InitiatingProcessAccountName",
91 | "GrandParentImage": "InitiatingProcessParentFileName",
92 | },
93 | "DeviceImageLoadEvents": {
94 | # 'ProcessGuid': ?,
95 | "ProcessId": "InitiatingProcessId",
96 | "Image": "InitiatingProcessFolderPath", # File path of the process that loaded the image
97 | "ImageLoaded": "FolderPath",
98 | "FileVersion": "InitiatingProcessVersionInfoProductVersion",
99 | "Description": "InitiatingProcessVersionInfoFileDescription",
100 | "Product": "InitiatingProcessVersionInfoProductName",
101 | "Company": "InitiatingProcessVersionInfoCompanyName",
102 | "OriginalFileName": "InitiatingProcessVersionInfoOriginalFileName",
103 | # 'Hashes': ?,
104 | "sha1": "SHA1",
105 | "sha256": "SHA256",
106 | "md5": "MD5",
107 | # 'Signed': ?
108 | # 'Signature': ?
109 | # 'SignatureStatus': ?
110 | "User": "InitiatingProcessAccountName",
111 | },
112 | "DeviceFileEvents": { # file_*, Sysmon EventID 11 (create), 23 (delete) -> DeviceFileEvents table
113 | # 'ProcessGuid': ?,
114 | "ProcessId": "InitiatingProcessId",
115 | "Image": "InitiatingProcessFolderPath",
116 | "TargetFilename": "FolderPath",
117 | # 'CreationUtcTime': 'Timestamp',
118 | "User": "RequestAccountName",
119 | # 'Hashes': ?,
120 | "sha1": "SHA1",
121 | "sha256": "SHA256",
122 | "md5": "MD5",
123 | "ObjectName": "FolderPath",
124 | },
125 | "DeviceNetworkEvents": { # network_connection, Sysmon EventID 3 -> DeviceNetworkEvents table
126 | # 'ProcessGuid': ?,
127 | "ProcessId": "InitiatingProcessId",
128 | "Image": "InitiatingProcessFolderPath",
129 | "User": "InitiatingProcessAccountName",
130 | "Protocol": "Protocol",
131 | # 'Initiated': ?,
132 | # 'SourceIsIpv6': ?,
133 | "SourceIp": "LocalIP",
134 | "SourceHostname": "DeviceName",
135 | "SourcePort": "LocalPort",
136 | # 'SourcePortName': ?,
137 | # 'DestinationIsIpv6': ?,
138 | "DestinationIp": "RemoteIP",
139 | "DestinationHostname": "RemoteUrl",
140 | "DestinationPort": "RemotePort",
141 | # 'DestinationPortName': ?,
142 | },
143 | "DeviceRegistryEvents": {
144 | # registry_*, Sysmon EventID 12 (create/delete), 13 (value set), 14 (key/value rename) -> DeviceRegistryEvents table,
145 | "EventType": "ActionType",
146 | # 'ProcessGuid': ?,
147 | "ProcessId": "InitiatingProcessId",
148 | "Image": "InitiatingProcessFolderPath",
149 | "TargetObject": "RegistryKey",
150 | # 'NewName': ?
151 | "Details": "RegistryValueData",
152 | "User": "InitiatingProcessAccountName",
153 | "ObjectName": "RegistryKey",
154 | },
155 | },
156 | generic_mappings={
157 | "EventType": "ActionType",
158 | "User": "InitiatingProcessAccountName",
159 | "CommandLine": "InitiatingProcessCommandLine",
160 | "Image": "InitiatingProcessFolderPath",
161 | "ProcessName": "InitiatingProcessFolderPath",
162 | "SourceImage": "InitiatingProcessFolderPath",
163 | "ProcessId": "InitiatingProcessId",
164 | "md5": "InitiatingProcessMD5",
165 | "sha1": "InitiatingProcessSHA1",
166 | "sha256": "InitiatingProcessSHA256",
167 | "ParentProcessId": "InitiatingProcessParentId",
168 | "ParentCommandLine": "InitiatingProcessParentCommandLine",
169 | "Company": "InitiatingProcessVersionInfoCompanyName",
170 | "Description": "InitiatingProcessVersionInfoFileDescription",
171 | "OriginalFileName": "InitiatingProcessVersionInfoOriginalFileName",
172 | "Product": "InitiatingProcessVersionInfoProductName",
173 | },
174 | )
175 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoftxdr/microsoftxdr.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from sigma.processing.conditions import (
4 | DetectionItemProcessingItemAppliedCondition,
5 | ExcludeFieldCondition,
6 | IncludeFieldCondition,
7 | LogsourceCondition,
8 | RuleProcessingItemAppliedCondition,
9 | RuleProcessingStateCondition,
10 | )
11 | from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline
12 | from sigma.processing.transformations import (
13 | DropDetectionItemTransformation,
14 | FieldMappingTransformation,
15 | ReplaceStringTransformation,
16 | RuleFailureTransformation,
17 | )
18 |
19 | from ..kusto_common.errors import InvalidFieldTransformation
20 | from ..kusto_common.postprocessing import PrependQueryTablePostprocessingItem
21 | from ..kusto_common.schema import create_schema
22 | from ..kusto_common.transformations import (
23 | DynamicFieldMappingTransformation,
24 | GenericFieldMappingTransformation,
25 | RegistryActionTypeValueTransformation,
26 | SetQueryTableStateTransformation,
27 | )
28 | from .mappings import (
29 | CATEGORY_TO_TABLE_MAPPINGS,
30 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS,
31 | MICROSOFT_XDR_FIELD_MAPPINGS,
32 | )
33 | from .schema import MicrosoftXDRSchema
34 | from .tables import MICROSOFT_XDR_TABLES
35 | from .transformations import (
36 | ParentImageValueTransformation,
37 | SplitDomainUserTransformation,
38 | XDRHashesValuesTransformation,
39 | )
40 |
41 | MICROSOFT_XDR_SCHEMA = create_schema(MicrosoftXDRSchema, MICROSOFT_XDR_TABLES)
42 |
43 | # Mapping from ParentImage to InitiatingProcessParentFileName. Must be used alongside of ParentImageValueTransformation
44 | parent_image_field_mapping = {"ParentImage": "InitiatingProcessParentFileName"}
45 |
46 | # Drop EventID field
47 | drop_eventid_proc_item = ProcessingItem(
48 | identifier="microsoft_xdr_drop_eventid",
49 | transformation=DropDetectionItemTransformation(),
50 | field_name_conditions=[IncludeFieldCondition(["EventID", "EventCode", "ObjectType"])],
51 | )
52 |
53 |
54 | ## Fieldmappings
55 | fieldmappings_proc_item = ProcessingItem(
56 | identifier="microsoft_xdr_table_fieldmappings",
57 | transformation=DynamicFieldMappingTransformation(MICROSOFT_XDR_FIELD_MAPPINGS),
58 | )
59 |
60 | ## Generic Field Mappings, keep this last
61 | ## Exclude any fields already mapped, e.g. if a table mapping has been applied.
62 | # This will fix the case where ProcessId is usually mapped to InitiatingProcessId, EXCEPT for the DeviceProcessEvent table where it stays as ProcessId.
63 | # So we can map ProcessId to ProcessId in the DeviceProcessEvents table mapping, and prevent the generic mapping to InitiatingProcessId from being applied
64 | # by adding a detection item condition that the table field mappings have been applied
65 |
66 | generic_field_mappings_proc_item = ProcessingItem(
67 | identifier="microsoft_xdr_generic_fieldmappings",
68 | transformation=GenericFieldMappingTransformation(MICROSOFT_XDR_FIELD_MAPPINGS),
69 | detection_item_conditions=[DetectionItemProcessingItemAppliedCondition("microsoft_xdr_table_fieldmappings")],
70 | detection_item_condition_linking=any,
71 | detection_item_condition_negation=True,
72 | )
73 |
74 |
75 | ## Field Value Replacements ProcessingItems
76 | replacement_proc_items = [
77 | # Sysmon uses abbreviations in RegistryKey values, replace with full key names as the DeviceRegistryEvents schema
78 | # expects them to be
79 | # Note: Ensure this comes AFTER field mapping renames, as we're specifying DeviceRegistryEvent fields
80 | #
81 | # Do this one first, or else the HKLM only one will replace HKLM and mess up the regex
82 | ProcessingItem(
83 | identifier="microsoft_xdr_registry_key_replace_currentcontrolset",
84 | transformation=ReplaceStringTransformation(
85 | regex=r"(?i)(^HKLM\\SYSTEM\\CurrentControlSet)",
86 | replacement=r"HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet001",
87 | ),
88 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])],
89 | ),
90 | ProcessingItem(
91 | identifier="microsoft_xdr_registry_key_replace_hklm",
92 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKLM)", replacement=r"HKEY_LOCAL_MACHINE"),
93 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])],
94 | ),
95 | ProcessingItem(
96 | identifier="microsoft_xdr_registry_key_replace_hku",
97 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKU)", replacement=r"HKEY_USERS"),
98 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])],
99 | ),
100 | ProcessingItem(
101 | identifier="microsoft_xdr_registry_key_replace_hkcr",
102 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKCR)", replacement=r"HKEY_LOCAL_MACHINE\\CLASSES"),
103 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])],
104 | ),
105 | ProcessingItem(
106 | identifier="microsoft_xdr_registry_actiontype_value",
107 | transformation=RegistryActionTypeValueTransformation(),
108 | field_name_conditions=[IncludeFieldCondition(["ActionType"])],
109 | ),
110 | # Extract Domain from Username fields
111 | ProcessingItem(
112 | identifier="microsoft_xdr_domain_username_extract",
113 | transformation=SplitDomainUserTransformation(),
114 | field_name_conditions=[IncludeFieldCondition(["AccountName", "InitiatingProcessAccountName"])],
115 | ),
116 | ProcessingItem(
117 | identifier="microsoft_xdr_hashes_field_values",
118 | transformation=XDRHashesValuesTransformation(),
119 | field_name_conditions=[IncludeFieldCondition(["Hashes"])],
120 | ),
121 | # Processing item to essentially ignore initiated field
122 | ProcessingItem(
123 | identifier="microsoft_xdr_network_initiated_field",
124 | transformation=DropDetectionItemTransformation(),
125 | field_name_conditions=[IncludeFieldCondition(["Initiated"])],
126 | rule_conditions=[LogsourceCondition(category="network_connection")],
127 | ),
128 | ]
129 |
130 | # ParentImage -> InitiatingProcessParentFileName
131 | parent_image_proc_items = [
132 | # First apply fieldmapping from ParentImage to InitiatingProcessParentFileName for non process-creation rules
133 | ProcessingItem(
134 | identifier="microsoft_xdr_parent_image_fieldmapping",
135 | transformation=FieldMappingTransformation(parent_image_field_mapping), # type: ignore
136 | rule_conditions=[
137 | # Exclude process_creation events, there's direct field mapping in this schema table
138 | LogsourceCondition(category="process_creation")
139 | ],
140 | rule_condition_negation=True,
141 | ),
142 | # Second, extract the parent process name from the full path
143 | ProcessingItem(
144 | identifier="microsoft_xdr_parent_image_name_value",
145 | transformation=ParentImageValueTransformation(),
146 | field_name_conditions=[
147 | IncludeFieldCondition(["InitiatingProcessParentFileName"]),
148 | ],
149 | rule_conditions=[
150 | # Exclude process_creation events, there's direct field mapping in this schema table
151 | LogsourceCondition(category="process_creation")
152 | ],
153 | rule_condition_negation=True,
154 | ),
155 | ]
156 |
157 | # Exceptions/Errors ProcessingItems
158 | # Catch-all for when the query table is not set, meaning the rule could not be mapped to a table or the table name was not set
159 | rule_error_proc_items = [
160 | # Category Not Supported or Query Table Not Set
161 | ProcessingItem(
162 | identifier="microsoft_xdr_unsupported_rule_category_or_missing_query_table",
163 | transformation=RuleFailureTransformation(
164 | "Rule category not yet supported by the Microsoft XDR pipeline or query_table is not set."
165 | ),
166 | rule_conditions=[
167 | RuleProcessingItemAppliedCondition("microsoft_xdr_set_query_table"), # type: ignore
168 | RuleProcessingStateCondition("query_table", None), # type: ignore
169 | ],
170 | rule_condition_linking=all,
171 | )
172 | ]
173 |
174 |
175 | def get_valid_fields(table_name):
176 | return (
177 | list(MICROSOFT_XDR_SCHEMA.tables[table_name].fields.keys())
178 | + list(MICROSOFT_XDR_FIELD_MAPPINGS.table_mappings.get(table_name, {}).keys())
179 | + list(MICROSOFT_XDR_FIELD_MAPPINGS.generic_mappings.keys())
180 | + ["Hashes"]
181 | )
182 |
183 |
184 | field_error_proc_items = []
185 |
186 | for table_name in MICROSOFT_XDR_SCHEMA.tables.keys():
187 | valid_fields = get_valid_fields(table_name)
188 |
189 | field_error_proc_items.append(
190 | ProcessingItem(
191 | identifier=f"microsoft_xdr_unsupported_fields_{table_name}",
192 | transformation=InvalidFieldTransformation(
193 | f"Please use valid fields for the {table_name} table, or the following fields that have keymappings in this "
194 | f"pipeline:\n{', '.join(sorted(set(valid_fields)))}"
195 | ),
196 | field_name_conditions=[ExcludeFieldCondition(fields=valid_fields)],
197 | rule_conditions=[
198 | RuleProcessingItemAppliedCondition("microsoft_xdr_set_query_table"),
199 | RuleProcessingStateCondition("query_table", table_name),
200 | ],
201 | rule_condition_linking=all,
202 | )
203 | )
204 |
205 | # Add a catch-all error for custom table names
206 | field_error_proc_items.append(
207 | ProcessingItem(
208 | identifier="microsoft_xdr_unsupported_fields_custom",
209 | transformation=InvalidFieldTransformation(
210 | "Invalid field name for the custom table. Please ensure you're using valid fields for your custom table."
211 | ),
212 | field_name_conditions=[
213 | ExcludeFieldCondition(fields=list(MICROSOFT_XDR_FIELD_MAPPINGS.generic_mappings.keys()) + ["Hashes"])
214 | ],
215 | rule_conditions=[
216 | RuleProcessingItemAppliedCondition("microsoft_xdr_set_query_table"), # type: ignore
217 | RuleProcessingStateCondition("query_table", None), # type: ignore
218 | ],
219 | rule_condition_linking=all,
220 | )
221 | )
222 |
223 |
224 | def microsoft_xdr_pipeline(
225 | transform_parent_image: Optional[bool] = True, query_table: Optional[str] = None
226 | ) -> ProcessingPipeline:
227 | """Pipeline for transformations for SigmaRules to use in the Kusto Query Language backend.
228 | Field mappings based on documentation found here:
229 | https://learn.microsoft.com/en-us/microsoft-365/security/defender/advanced-hunting-query-language?view=o365-worldwide
230 |
231 | :param query_table: If specified, the table name will be used in the finalizer, otherwise the table name will be selected based on the category of the rule.
232 | :type query_table: Optional[str]
233 | :param transform_parent_image: If True, the ParentImage field will be mapped to InitiatingProcessParentFileName, and
234 | the parent process name in the ParentImage will be extracted and used. This is because the Microsoft 365 Defender
235 | table schema does not contain a InitiatingProcessParentFolderPath field like it does for InitiatingProcessFolderPath.
236 | i.e. ParentImage: C:\\Windows\\System32\\whoami.exe -> InitiatingProcessParentFileName: whoami.exe.
237 | Defaults to True
238 | :type transform_parent_image: Optional[bool]
239 |
240 | :return: ProcessingPipeline for Microsoft 365 Defender Backend
241 | :rtype: ProcessingPipeline
242 | """
243 |
244 | pipeline_items = [
245 | ProcessingItem(
246 | identifier="microsoft_xdr_set_query_table",
247 | transformation=SetQueryTableStateTransformation(
248 | query_table, CATEGORY_TO_TABLE_MAPPINGS, EVENTID_CATEGORY_TO_TABLE_MAPPINGS
249 | ),
250 | ),
251 | drop_eventid_proc_item,
252 | fieldmappings_proc_item,
253 | generic_field_mappings_proc_item,
254 | *replacement_proc_items,
255 | *rule_error_proc_items,
256 | *field_error_proc_items,
257 | ]
258 |
259 | if transform_parent_image:
260 | pipeline_items[4:4] = parent_image_proc_items
261 |
262 | return ProcessingPipeline(
263 | name="Generic Log Sources to Windows XDR tables and fields",
264 | priority=10,
265 | items=pipeline_items,
266 | allowed_backends=frozenset(["kusto"]),
267 | postprocessing_items=[PrependQueryTablePostprocessingItem], # type: ignore
268 | )
269 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoftxdr/schema.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 |
3 | from sigma.pipelines.kusto_common.schema import BaseSchema, FieldMappings
4 |
5 |
6 | @dataclass
7 | class MicrosoftXDRSchema(BaseSchema):
8 | pass
9 |
10 |
11 | @dataclass
12 | class MicrosoftXDRFieldMappings(FieldMappings):
13 | pass
14 |
--------------------------------------------------------------------------------
/sigma/pipelines/microsoftxdr/transformations.py:
--------------------------------------------------------------------------------
1 | from typing import Iterable, Optional, Union
2 |
3 | from sigma.processing.transformations import (
4 | DetectionItemTransformation,
5 | ValueTransformation,
6 | )
7 | from sigma.rule import SigmaDetection, SigmaDetectionItem, SigmaString
8 | from sigma.types import SigmaType
9 |
10 | from ..kusto_common.transformations import BaseHashesValuesTransformation
11 |
12 |
13 | ## Custom DetectionItemTransformation to split domain and user, if applicable
14 | class SplitDomainUserTransformation(DetectionItemTransformation):
15 | """Custom DetectionItemTransformation transformation to split a User field into separate domain and user fields,
16 | if applicable. This is to handle the case where the Sysmon `User` field may contain a domain AND username, and
17 | Advanced Hunting queries separate out the domain and username into separate fields.
18 | If a matching field_name_condition field uses the schema DOMAIN\\USER, a new SigmaDetectionItem
19 | will be made for the Domain and put inside a SigmaDetection with the original User SigmaDetectionItem
20 | (minus the domain) for the matching SigmaDetectionItem.
21 |
22 | You should use this with a field_name_condition for `IncludeFieldName(['field', 'names', 'for', 'username']`)"""
23 |
24 | def apply_detection_item(
25 | self, detection_item: SigmaDetectionItem
26 | ) -> Optional[Union[SigmaDetection, SigmaDetectionItem]]:
27 | to_return = []
28 | if not isinstance(detection_item.value, list): # Ensure its a list, but it most likely will be
29 | detection_item.value = list(detection_item.value)
30 | for d in detection_item.value:
31 | username = d.to_plain().split("\\")
32 | username_field_mappings = {
33 | "AccountName": "AccountDomain",
34 | "RequestAccountName": "RequestAccountDomain",
35 | "InitiatingProcessAccountName": "InitiatingProcessAccountDomain",
36 | }
37 | if len(username) == 2:
38 | domain = username[0]
39 | username = [SigmaString(username[1])]
40 |
41 | domain_field = username_field_mappings.get(detection_item.field, "InitiatingProcessAccountDomain")
42 | domain_value = [SigmaString(domain)]
43 | user_detection_item = SigmaDetectionItem(
44 | field=detection_item.field,
45 | modifiers=[],
46 | value=username,
47 | )
48 | domain_detection_item = SigmaDetectionItem(field=domain_field, modifiers=[], value=domain_value)
49 | to_return.append(SigmaDetection(detection_items=[user_detection_item, domain_detection_item]))
50 | else:
51 | to_return.append(
52 | SigmaDetection(
53 | [
54 | SigmaDetectionItem(
55 | field=detection_item.field, modifiers=detection_item.modifiers, value=username
56 | )
57 | ]
58 | )
59 | )
60 | return SigmaDetection(to_return)
61 |
62 |
63 | # Extract parent process name from ParentImage after applying ParentImage field mapping
64 | class ParentImageValueTransformation(ValueTransformation):
65 | """Custom ValueTransformation transformation. Unfortunately, none of the table schemas have
66 | InitiatingProcessParentFolderPath like they do InitiatingProcessFolderPath. Due to this, we cannot directly map the
67 | Sysmon `ParentImage` field to a table field. However, InitiatingProcessParentFileName is an available field in
68 | nearly all tables, so we will extract the process name and use that instead.
69 |
70 | Use this transformation BEFORE mapping ParentImage to InitiatingProcessFileName
71 | """
72 |
73 | def apply_value(self, field: str, val: SigmaType) -> Optional[Union[SigmaType, Iterable[SigmaType]]]:
74 | parent_process_name = str(val.to_plain().split("\\")[-1].split("/")[-1])
75 | return SigmaString(parent_process_name)
76 |
77 |
78 | class XDRHashesValuesTransformation(BaseHashesValuesTransformation):
79 | """
80 | Transforms the Hashes field in XDR Tables to create fields for each hash algorithm.
81 | """
82 |
83 | def __init__(self):
84 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256"], field_prefix="")
85 |
--------------------------------------------------------------------------------
/sigma/pipelines/sentinelasim/__init__.py:
--------------------------------------------------------------------------------
1 | from .sentinelasim import sentinel_asim_pipeline
2 |
3 | pipelines = {
4 | "sentinel_asim": sentinel_asim_pipeline, # TODO: adapt identifier to something approproiate
5 | }
6 |
--------------------------------------------------------------------------------
/sigma/pipelines/sentinelasim/mappings.py:
--------------------------------------------------------------------------------
1 | from sigma.pipelines.common import (
2 | logsource_windows_file_access,
3 | logsource_windows_file_change,
4 | logsource_windows_file_delete,
5 | logsource_windows_file_event,
6 | logsource_windows_file_rename,
7 | logsource_windows_network_connection,
8 | logsource_windows_process_creation,
9 | logsource_windows_registry_add,
10 | logsource_windows_registry_delete,
11 | logsource_windows_registry_event,
12 | logsource_windows_registry_set,
13 | )
14 | from sigma.pipelines.kusto_common.schema import FieldMappings
15 |
16 | # from .schema import MicrosoftXDRFieldMappings
17 | from .tables import SENTINEL_ASIM_TABLES
18 |
19 | # Get table names from the tables.py file
20 | table_names = list(SENTINEL_ASIM_TABLES.keys())
21 |
22 |
23 | # Rule Categories -> Query Table Names
24 | # Use the table names from the tables.py file by looking for relevant terms in the table names
25 | CATEGORY_TO_TABLE_MAPPINGS = {
26 | "process_creation": next((table for table in table_names if "process" in table.lower()), "imProcessCreate"),
27 | # "image_load": next((table for table in table_names if 'image' in table.lower()), None),
28 | "file_access": next((table for table in table_names if "file" in table.lower()), "imFileEvent"),
29 | "file_change": next((table for table in table_names if "file" in table.lower()), "imFileEvent"),
30 | "file_delete": next((table for table in table_names if "file" in table.lower()), "imFileEvent"),
31 | "file_event": next((table for table in table_names if "file" in table.lower()), "imFileEvent"),
32 | "file_rename": next((table for table in table_names if "file" in table.lower()), "imFileEvent"),
33 | "registry_add": next((table for table in table_names if "registry" in table.lower()), "imRegistry"),
34 | "registry_delete": next((table for table in table_names if "registry" in table.lower()), "imRegistry"),
35 | "registry_event": next((table for table in table_names if "registry" in table.lower()), "imRegistry"),
36 | "registry_set": next((table for table in table_names if "registry" in table.lower()), "imRegistry"),
37 | "network_connection": next((table for table in table_names if "network" in table.lower()), "imNetworkSession"),
38 | "proxy": next((table for table in table_names if "web" in table.lower()), "imWebSession"),
39 | "webserver": next((table for table in table_names if "web" in table.lower()), "imWebSession"),
40 | }
41 |
42 |
43 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS = {
44 | "process": "imProcessCreate",
45 | "logon": "imAuthentication",
46 | "registry": "imRegistry",
47 | "file": "imFileEvent",
48 | "network": "imNetworkSession",
49 | "image_load": "imProcessCreate",
50 | "pipe": "imFileEvent",
51 | "wmi": "imProcessCreate",
52 | }
53 |
54 |
55 | ## Rule Categories -> RuleConditions
56 | CATEGORY_TO_CONDITIONS_MAPPINGS = {
57 | "process_creation": logsource_windows_process_creation(),
58 | # "image_load": logsource_windows_image_load(),
59 | "file_access": logsource_windows_file_access(),
60 | "file_change": logsource_windows_file_change(),
61 | "file_delete": logsource_windows_file_delete(),
62 | "file_event": logsource_windows_file_event(),
63 | "file_rename": logsource_windows_file_rename(),
64 | "registry_add": logsource_windows_registry_add(),
65 | "registry_delete": logsource_windows_registry_delete(),
66 | "registry_event": logsource_windows_registry_event(),
67 | "registry_set": logsource_windows_registry_set(),
68 | "network_connection": logsource_windows_network_connection(),
69 | }
70 |
71 |
72 | class SentinelASIMFieldMappings(FieldMappings):
73 | pass
74 |
75 |
76 | SENTINEL_ASIM_FIELD_MAPPINGS = SentinelASIMFieldMappings(
77 | table_mappings={
78 | "imAuditEvent": {
79 | "CommandLine": "Operation",
80 | "User": "ActorUsername",
81 | "TargetFilename": "Object",
82 | "Image": "ActingAppName",
83 | "SourceIP": "SrcIpAddr",
84 | "DestinationIP": "TargetIpAddr",
85 | "DestinationHostname": "TargetHostname",
86 | "EventType": "EventType",
87 | "TargetObject": "Object",
88 | "NewValue": "NewValue",
89 | "OldValue": "OldValue",
90 | "type": "ObjectType",
91 | "SourceHostname": "SrcHostname",
92 | "TargetUsername": "TargetUsername",
93 | "ProcessName": "ActingAppName",
94 | "ProcessId": "ActingAppId",
95 | "LogonId": "ActorSessionId",
96 | "TargetLogonId": "TargetSessionId",
97 | "SubjectUserName": "ActorUsername",
98 | "ObjectName": "Object",
99 | "ObjectType": "ObjectType",
100 | "NewProcessName": "ActingAppName",
101 | "Status": "EventResultDetails",
102 | "IpAddress": ["SrcIpAddr", "TargetIpAddr"],
103 | "SourcePort": "SrcPortNumber",
104 | "DestinationPort": "TargetPortNumber",
105 | "Protocol": "LogonProtocol",
106 | },
107 | "imAuthentication": {
108 | "User": ["ActorUsername", "TargetUsername"], # Alias field, can map to either
109 | "SourceHostname": "SrcHostname",
110 | "DestinationHostname": "TargetHostname",
111 | "SourceIP": "SrcIpAddr",
112 | "DestinationIP": "TargetIpAddr",
113 | "SourcePort": "SrcPortNumber",
114 | "DestinationPort": "TargetPortNumber",
115 | "Status": "EventResultDetails",
116 | "IpAddress": ["SrcIpAddr", "TargetIpAddr"], # Can map to either source or target IP
117 | "SubjectUserName": "ActorUsername",
118 | "WorkstationName": "SrcHostname", # This is an approximation
119 | "ComputerName": ["SrcHostname", "TargetHostname"], # Can map to either source or target hostname
120 | "AuthenticationPackageName": "LogonProtocol",
121 | "LogonProcessName": "LogonMethod",
122 | "TargetUserSid": "TargetUserId",
123 | "TargetDomainName": "TargetDomain",
124 | "TargetOutboundDomainName": "TargetDomain",
125 | "ElevatedToken": "EventType", # This could map to "Elevate" in EventType
126 | "TargetUserPrincipalName": "TargetUsername", # This is an approximation
127 | "SubjectDomainName": "ActorScope",
128 | "SubjectUserSid": "ActorUserId",
129 | "SubjectLogonId": "ActorSessionId",
130 | "IpPort": ["SrcPortNumber", "TargetPortNumber"], # Can map to either source or target port
131 | "LmPackageName": "LogonProtocol", # This is an approximation
132 | "userAgent": "HttpUserAgent",
133 | # Common fields with specific relevance to this table
134 | "DvcHostname": ["SrcHostname", "TargetHostname"], # Can map to either source or target hostname
135 | "DvcIpAddr": ["SrcIpAddr", "TargetIpAddr"], # Can map to either source or target IP
136 | "DvcDomain": ["SrcDomain", "TargetDomain"], # Can map to either source or target domain
137 | "DvcDomainType": ["SrcDomainType", "TargetDomainType"], # Can map to either source or target domain type
138 | "DvcFQDN": ["SrcFQDN", "TargetFQDN"], # Can map to either source or target FQDN
139 | "DvcId": ["SrcDvcId", "TargetDvcId"], # Can map to either source or target device ID
140 | "DvcIdType": ["SrcDvcIdType", "TargetDvcIdType"], # Can map to either source or target device ID type
141 | "DvcDescription": ["SrcDescription", "TargetDescription"], # Can map to either source or target description
142 | "DvcOs": ["SrcDvcOs", "TargetDvcOs"], # Can map to either source or target OS
143 | },
144 | "_Im_Dns": {
145 | "SourceIP": "SrcIpAddr",
146 | "DestinationIP": "DstIpAddr",
147 | "SourceHostname": "SrcHostname",
148 | "DestinationHostname": "DstHostname",
149 | "SourcePort": "SrcPortNumber",
150 | "DestinationPort": "DstPortNumber",
151 | "IpAddress": ["SrcIpAddr", "DstIpAddr"], # Can map to either source or target IP
152 | "ProcessName": "SrcProcessName",
153 | "ProcessId": "SrcProcessId",
154 | "User": "SrcUsername",
155 | "ComputerName": ["SrcHostname", "DstHostname"], # Can map to either source or target hostname
156 | "Image": "SrcProcessName",
157 | "QueryName": "DnsQuery",
158 | "QueryStatus": "EventResultDetails",
159 | "QueryResults": "DnsResponseName",
160 | "Protocol": "NetworkProtocol",
161 | "c-useragent": "HttpUserAgent",
162 | "userAgent": "HttpUserAgent",
163 | "Category": "UrlCategory",
164 | "Status": "EventResultDetails",
165 | "Product": "EventProduct",
166 | "Company": "EventVendor",
167 | },
168 | "imFileEvent": {
169 | "SourceIP": "SrcIpAddr",
170 | "DestinationIP": "DstIpAddr",
171 | "SourceHostname": "SrcHostname",
172 | "DestinationHostname": "DstHostname",
173 | "SourcePort": "SrcPortNumber",
174 | "User": "ActorUsername",
175 | "TargetFilename": "TargetFileName",
176 | "Image": "TargetFilePath",
177 | "ParentImage": "ActingProcessName",
178 | "CommandLine": "ActingProcessCommandLine",
179 | "ParentCommandLine": "ActingProcessCommandLine",
180 | "ProcessName": "ActingProcessName",
181 | "ProcessId": "ActingProcessId",
182 | "ParentProcessName": "ActingProcessName",
183 | "ParentProcessId": "ActingProcessId",
184 | "LogonId": "ActorSessionId",
185 | "TargetObject": "TargetFilePath",
186 | "Details": "TargetFilePath",
187 | "SubjectUserName": "ActorUsername",
188 | "ObjectName": "TargetFilePath",
189 | "OldFilePath": "SrcFilePath",
190 | "NewFilePath": "TargetFilePath",
191 | "OldFileName": "SrcFileName",
192 | "NewFileName": "TargetFileName",
193 | "c-uri": "TargetUrl",
194 | "c-useragent": "HttpUserAgent",
195 | "cs-method": "NetworkApplicationProtocol",
196 | "userAgent": "HttpUserAgent",
197 | "Category": "ThreatCategory",
198 | "OperationName": "EventType",
199 | "ProcessGuid": "ActingProcessGuid",
200 | "CreationUtcTime": "TargetFileCreationTime",
201 | },
202 | "imNetworkSession": {
203 | "SourceIP": "SrcIpAddr",
204 | "DestinationIP": "DstIpAddr",
205 | "DestinationIp": "DstIpAddr",
206 | "SourceHostname": "SrcHostname",
207 | "DestinationHostname": "DstHostname",
208 | "SourcePort": "SrcPortNumber",
209 | "DestinationPort": "DstPortNumber",
210 | "SourceMAC": "SrcMacAddr",
211 | "DestinationMAC": "DstMacAddr",
212 | "Protocol": "NetworkProtocol",
213 | "NetworkProtocol": "NetworkApplicationProtocol",
214 | "User": ["SrcUsername", "DstUsername"],
215 | "Image": ["SrcProcessName", "DstProcessName"],
216 | "ProcessName": ["SrcProcessName", "DstProcessName"],
217 | "ProcessId": ["SrcProcessId", "DstProcessId"],
218 | "ProcessGuid": ["SrcProcessGuid", "DstProcessGuid"],
219 | "LogonId": ["SrcUserId", "DstUserId"],
220 | "SourceUserName": "SrcUsername",
221 | "DestinationUserName": "DstUsername",
222 | "SourceImage": "SrcProcessName",
223 | "DestinationImage": "DstProcessName",
224 | "SourceProcessGUID": "SrcProcessGuid",
225 | "DestinationProcessGUID": "DstProcessGuid",
226 | "SourceProcessId": "SrcProcessId",
227 | "DestinationProcessId": "DstProcessId",
228 | "SourceThreadId": "SrcProcessId",
229 | "DestinationThreadId": "DstProcessId",
230 | "SourceIsIpv6": "NetworkProtocolVersion",
231 | "DestinationIsIpv6": "NetworkProtocolVersion",
232 | "Initiated": "NetworkDirection",
233 | "SourcePortName": "SrcAppName",
234 | "DestinationPortName": "DstAppName",
235 | "State": "EventSubType",
236 | "IpProtocol": "NetworkProtocol",
237 | "BytesReceived": "DstBytes",
238 | "BytesSent": "SrcBytes",
239 | "PacketsReceived": "DstPackets",
240 | "PacketsSent": "SrcPackets",
241 | "c-uri": "TargetUrl",
242 | "c-useragent": "HttpUserAgent",
243 | "cs-method": "NetworkApplicationProtocol",
244 | "cs-version": "NetworkProtocolVersion",
245 | "cs-Cookie": "HttpUserAgent",
246 | "cs-Referrer": "HttpUserAgent",
247 | "sc-status": "EventResultDetails",
248 | "userAgent": "HttpUserAgent",
249 | "Category": "ThreatCategory",
250 | "OperationName": "EventType",
251 | "Action": "DvcAction",
252 | "RuleName": "NetworkRuleName",
253 | },
254 | "imProcessCreate": { # process_creation, Sysmon EventID 1 -> imProcessCreate table
255 | "Image": "TargetProcessName",
256 | "ParentImage": ["ParentProcessName", "ActingProcessName"],
257 | "CommandLine": "TargetProcessCommandLine",
258 | "ParentCommandLine": "ActingProcessCommandLine",
259 | "User": "TargetUsername",
260 | "LogonGuid": "TargetUserSessionGuid",
261 | "LogonId": "TargetUserSessionId",
262 | "SourceImage": "ActingProcessName",
263 | "TargetImage": "TargetProcessName",
264 | "SourceUser": "ActorUsername",
265 | "TargetUser": "TargetUsername",
266 | "SourceProcessId": "ActingProcessId",
267 | "TargetProcessId": "TargetProcessId",
268 | "SourceProcessGUID": "ActingProcessGuid",
269 | "TargetProcessGUID": "TargetProcessGuid",
270 | "ProcessId": "TargetProcessId",
271 | "ProcessGuid": "TargetProcessGuid",
272 | "ParentProcessId": ["ParentProcessId", "ActingProcessId"],
273 | "ParentProcessGuid": ["ParentProcessGuid", "ActingProcessGuid"],
274 | "ParentUser": "ActorUsername",
275 | "IntegrityLevel": "TargetProcessIntegrityLevel",
276 | "ParentProcessName": "ParentProcessName",
277 | "CurrentDirectory": "TargetProcessCurrentDirectory",
278 | "OriginalFileName": ["TargetProcessFileOriginalName", "TargetProcessFilename"],
279 | "Description": "TargetProcessFileDescription",
280 | "Product": "TargetProcessFileProduct",
281 | "Company": "TargetProcessFileCompany",
282 | "FileVersion": "TargetProcessFileVersion",
283 | "GrantedAccess": "TargetProcessTokenElevation",
284 | "CallTrace": "TargetProcessInjectedAddress",
285 | "ParentIntegrityLevel": "ParentProcessIntegrityLevel",
286 | "TerminalSessionId": "TargetUserSessionId",
287 | "sha1": "TargetProcessSHA1",
288 | "sha256": "TargetProcessSHA256",
289 | "md5": "TargetProcessMD5",
290 | "ProcessVersionInfoOriginalFileName": "TargetProcessFileVersion",
291 | "ProcessVersionInfoFileDescription": "TargetProcessFileDescription",
292 | "ProcessIntegrityLevel": "TargetProcessIntegrityLevel",
293 | "InitiatingProcessFolderPath": "ActingProcessName",
294 | "InitiatingProcessCommandLine": "ActingProcessCommandLine",
295 | },
296 | "imRegistry": {
297 | "Image": "ActingProcessName",
298 | "ParentImage": "ParentProcessName",
299 | "User": "ActorUsername",
300 | "TargetObject": "RegistryKey",
301 | "Details": "RegistryValueData",
302 | "EventType": "EventType",
303 | "ProcessId": "ActingProcessId",
304 | "ProcessGuid": "ActingProcessGuid",
305 | "ParentProcessId": "ParentProcessId",
306 | "ParentProcessGuid": "ParentProcessGuid",
307 | "ObjectName": "RegistryKey",
308 | "ObjectValueName": "RegistryValue",
309 | "ObjectType": "RegistryValueType",
310 | "ObjectValue": "RegistryValueData",
311 | "OldName": "RegistryPreviousKey",
312 | "NewName": "RegistryKey",
313 | "OldValueType": "RegistryPreviousValueType",
314 | "NewValueType": "RegistryValueType",
315 | "OldValue": "RegistryPreviousValueData",
316 | "NewValue": "RegistryValueData",
317 | "ProcessName": "ActingProcessName",
318 | "SubjectUserName": "ActorUsername",
319 | "SubjectUserSid": "ActorUserId",
320 | "SubjectDomainName": "ActorScope",
321 | "SubjectLogonId": "ActorSessionId",
322 | },
323 | "imWebSession": {
324 | "c-uri": "Url",
325 | "c-uri-query": "Url",
326 | "c-useragent": "HttpUserAgent",
327 | "cs-method": "HttpRequestMethod",
328 | "cs-version": "HttpVersion",
329 | "cs-host": "HttpHost",
330 | "cs-Referrer": "HttpReferrer",
331 | "sc-status": "HttpStatusCode",
332 | "cs-User-Agent": "HttpUserAgent",
333 | "r-dns": "HttpHost",
334 | "request": "Url",
335 | "request_body": "Url",
336 | "request_method": "HttpRequestMethod",
337 | "request_url": "Url",
338 | "request_url_query": "Url",
339 | "response_status_code": "HttpStatusCode",
340 | "url_category": "UrlCategory",
341 | "url_original": "UrlOriginal",
342 | "http_request_time": "HttpRequestTime",
343 | "http_response_time": "HttpResponseTime",
344 | "http_content_type": "HttpContentType",
345 | "http_user_agent": "HttpUserAgent",
346 | "http_referrer": "HttpReferrer",
347 | "x_forwarded_for": "HttpRequestXff",
348 | "file_name": "FileName",
349 | "file_hash": ["FileMD5", "FileSHA1", "FileSHA256", "FileSHA512"],
350 | "file_size": "FileSize",
351 | "file_type": "FileContentType",
352 | },
353 | },
354 | generic_mappings={
355 | "EventID": "EventOriginalType",
356 | "EventType": "EventType",
357 | "Product": "EventProduct",
358 | "Vendor": "EventVendor",
359 | "DeviceName": "DvcHostname",
360 | "DeviceHostname": "DvcHostname",
361 | "Computer": "DvcHostname",
362 | "Hostname": "DvcHostname",
363 | "IpAddress": "DvcIpAddr",
364 | "SourceSystem": "EventProduct",
365 | "TimeGenerated": "EventStartTime",
366 | "ProcessName": "ActingProcessName",
367 | },
368 | )
369 |
--------------------------------------------------------------------------------
/sigma/pipelines/sentinelasim/schema.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 |
3 | from sigma.pipelines.kusto_common.schema import BaseSchema, FieldMappings
4 |
5 |
6 | @dataclass
7 | class SentinelASIMSchema(BaseSchema):
8 | pass
9 |
10 |
11 | @dataclass
12 | class SentinelASIMFieldMappings(FieldMappings):
13 | pass
14 |
--------------------------------------------------------------------------------
/sigma/pipelines/sentinelasim/sentinelasim.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from sigma.pipelines.kusto_common.postprocessing import (
4 | PrependQueryTablePostprocessingItem,
5 | )
6 | from sigma.processing.conditions import (
7 | DetectionItemProcessingItemAppliedCondition,
8 | ExcludeFieldCondition,
9 | IncludeFieldCondition,
10 | LogsourceCondition,
11 | RuleProcessingItemAppliedCondition,
12 | RuleProcessingStateCondition,
13 | )
14 | from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline
15 | from sigma.processing.transformations import (
16 | DropDetectionItemTransformation,
17 | ReplaceStringTransformation,
18 | RuleFailureTransformation,
19 | )
20 |
21 | from ..kusto_common.errors import InvalidFieldTransformation
22 | from ..kusto_common.schema import create_schema
23 | from ..kusto_common.transformations import (
24 | DynamicFieldMappingTransformation,
25 | GenericFieldMappingTransformation,
26 | RegistryActionTypeValueTransformation,
27 | SetQueryTableStateTransformation,
28 | )
29 | from .mappings import (
30 | CATEGORY_TO_TABLE_MAPPINGS,
31 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS,
32 | SENTINEL_ASIM_FIELD_MAPPINGS,
33 | )
34 | from .schema import SentinelASIMSchema
35 | from .tables import SENTINEL_ASIM_TABLES
36 | from .transformations import (
37 | FileEventHashesValuesTransformation,
38 | ProcessCreateHashesValuesTransformation,
39 | WebSessionHashesValuesTransformation,
40 | )
41 |
42 | SENTINEL_ASIM_SCHEMA = create_schema(SentinelASIMSchema, SENTINEL_ASIM_TABLES)
43 |
44 | # Drop EventID field
45 | drop_eventid_proc_item = ProcessingItem(
46 | identifier="sentinel_asim_drop_eventid",
47 | transformation=DropDetectionItemTransformation(),
48 | field_name_conditions=[IncludeFieldCondition(["EventID", "EventCode", "ObjectType"])],
49 | )
50 |
51 | ## Fieldmappings
52 | fieldmappings_proc_item = ProcessingItem(
53 | identifier="sentinel_asim_table_fieldmappings",
54 | transformation=DynamicFieldMappingTransformation(SENTINEL_ASIM_FIELD_MAPPINGS),
55 | )
56 |
57 | ## Generic Field Mappings, keep this last
58 | ## Exclude any fields already mapped, e.g. if a table mapping has been applied.
59 | # This will fix the case where ProcessId is usually mapped to InitiatingProcessId, EXCEPT for the DeviceProcessEvent table where it stays as ProcessId.
60 | # So we can map ProcessId to ProcessId in the DeviceProcessEvents table mapping, and prevent the generic mapping to InitiatingProcessId from being applied
61 | # by adding a detection item condition that the table field mappings have been applied
62 |
63 | generic_field_mappings_proc_item = ProcessingItem(
64 | identifier="sentinel_asim_generic_fieldmappings",
65 | transformation=GenericFieldMappingTransformation(SENTINEL_ASIM_FIELD_MAPPINGS),
66 | detection_item_conditions=[DetectionItemProcessingItemAppliedCondition("sentinel_asim_table_fieldmappings")],
67 | detection_item_condition_linking=any,
68 | detection_item_condition_negation=True,
69 | )
70 |
71 |
72 | ## Field Value Replacements ProcessingItems
73 | replacement_proc_items = [
74 | # Sysmon uses abbreviations in RegistryKey values, replace with full key names as the DeviceRegistryEvents schema
75 | # expects them to be
76 | # Note: Ensure this comes AFTER field mapping renames, as we're specifying DeviceRegistryEvent fields
77 | #
78 | # Do this one first, or else the HKLM only one will replace HKLM and mess up the regex
79 | ProcessingItem(
80 | identifier="sentinel_asim_registry_key_replace_currentcontrolset",
81 | transformation=ReplaceStringTransformation(
82 | regex=r"(?i)(^HKLM\\SYSTEM\\CurrentControlSet)",
83 | replacement=r"HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet001",
84 | ),
85 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])],
86 | ),
87 | ProcessingItem(
88 | identifier="sentinel_asim_registry_key_replace_hklm",
89 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKLM)", replacement=r"HKEY_LOCAL_MACHINE"),
90 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "RegistryPreviousKey"])],
91 | ),
92 | ProcessingItem(
93 | identifier="sentinel_asim_registry_key_replace_hku",
94 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKU)", replacement=r"HKEY_USERS"),
95 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "RegistryPreviousKey"])],
96 | ),
97 | ProcessingItem(
98 | identifier="sentinel_asim_registry_key_replace_hkcr",
99 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKCR)", replacement=r"HKEY_LOCAL_MACHINE\\CLASSES"),
100 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "RegistryPreviousKey"])],
101 | ),
102 | ProcessingItem(
103 | identifier="sentinel_asim_registry_actiontype_value",
104 | transformation=RegistryActionTypeValueTransformation(),
105 | field_name_conditions=[IncludeFieldCondition(["EventType"])],
106 | ),
107 | # Processing item to transform the Hashes field in the SecurityEvent table to get rid of the hash algorithm prefix in each value
108 | ProcessingItem(
109 | identifier="sentinel_asim_processcreate_hashes_field_values",
110 | transformation=ProcessCreateHashesValuesTransformation(),
111 | field_name_conditions=[IncludeFieldCondition(["Hashes"])],
112 | rule_conditions=[RuleProcessingStateCondition("query_table", "imProcessCreate")],
113 | ),
114 | ProcessingItem(
115 | identifier="sentinel_asim_fileevent_hashes_field_values",
116 | transformation=FileEventHashesValuesTransformation(),
117 | field_name_conditions=[IncludeFieldCondition(["Hashes"])],
118 | rule_conditions=[RuleProcessingStateCondition("query_table", "imFileEvent")],
119 | ),
120 | ProcessingItem(
121 | identifier="sentinel_asim_webrequest_hashes_field_values",
122 | transformation=WebSessionHashesValuesTransformation(),
123 | field_name_conditions=[IncludeFieldCondition(["Hashes"])],
124 | rule_conditions=[RuleProcessingStateCondition("query_table", "imWebSession")],
125 | ),
126 | # Processing item to essentially ignore initiated field
127 | ProcessingItem(
128 | identifier="sentinel_asim_network_initiated_field",
129 | transformation=DropDetectionItemTransformation(),
130 | field_name_conditions=[IncludeFieldCondition(["Initiated"])],
131 | rule_conditions=[LogsourceCondition(category="network_connection")],
132 | ),
133 | ]
134 |
135 | # Exceptions/Errors ProcessingItems
136 | # Catch-all for when the query table is not set, meaning the rule could not be mapped to a table or the table name was not set
137 | rule_error_proc_items = [
138 | # Category Not Supported or Query Table Not Set
139 | ProcessingItem(
140 | identifier="sentinel_asim_unsupported_rule_category_or_missing_query_table",
141 | transformation=RuleFailureTransformation(
142 | "Rule category not yet supported by the Sentinel ASIM pipeline or query_table is not set."
143 | ),
144 | rule_conditions=[
145 | RuleProcessingItemAppliedCondition("sentinel_asim_set_query_table"), # type: ignore
146 | RuleProcessingStateCondition("query_table", None), # type: ignore
147 | ],
148 | rule_condition_linking=all,
149 | )
150 | ]
151 |
152 |
153 | def get_valid_fields(table_name):
154 | return (
155 | list(SENTINEL_ASIM_SCHEMA.tables[table_name].fields.keys())
156 | + list(SENTINEL_ASIM_FIELD_MAPPINGS.table_mappings.get(table_name, {}).keys())
157 | + list(SENTINEL_ASIM_FIELD_MAPPINGS.generic_mappings.keys())
158 | + ["Hashes"]
159 | )
160 |
161 |
162 | field_error_proc_items = []
163 |
164 | for table_name in SENTINEL_ASIM_SCHEMA.tables.keys():
165 | valid_fields = get_valid_fields(table_name)
166 |
167 | field_error_proc_items.append(
168 | ProcessingItem(
169 | identifier=f"sentinel_asim_unsupported_fields_{table_name}",
170 | transformation=InvalidFieldTransformation(
171 | f"Please use valid fields for the {table_name} table, or the following fields that have fieldmappings in this "
172 | f"pipeline:\n{', '.join(sorted(set(valid_fields)))}"
173 | ),
174 | field_name_conditions=[ExcludeFieldCondition(fields=valid_fields)],
175 | rule_conditions=[
176 | RuleProcessingItemAppliedCondition("sentinel_asim_set_query_table"), # type: ignore
177 | RuleProcessingStateCondition("query_table", table_name), # type: ignore
178 | ],
179 | rule_condition_linking=all,
180 | )
181 | )
182 |
183 | # Add a catch-all error for custom table names
184 | field_error_proc_items.append(
185 | ProcessingItem(
186 | identifier="sentinel_asim_unsupported_fields_custom",
187 | transformation=InvalidFieldTransformation(
188 | "Invalid field name for the custom table. Please ensure you're using valid fields for your custom table."
189 | ),
190 | field_name_conditions=[
191 | ExcludeFieldCondition(fields=list(SENTINEL_ASIM_FIELD_MAPPINGS.generic_mappings.keys()) + ["Hashes"])
192 | ],
193 | rule_conditions=[
194 | RuleProcessingItemAppliedCondition("sentinel_asim_set_query_table"), # type: ignore
195 | RuleProcessingStateCondition("query_table", None), # type: ignore
196 | ],
197 | rule_condition_linking=all,
198 | )
199 | )
200 |
201 |
202 | def sentinel_asim_pipeline(
203 | transform_parent_image: Optional[bool] = True, query_table: Optional[str] = None
204 | ) -> ProcessingPipeline:
205 | """Pipeline for transformations for SigmaRules to use in the Kusto Query Language backend.
206 |
207 | :param query_table: If specified, the table name will be used in the finalizer, otherwise the table name will be selected based on the category of the rule.
208 | :type query_table: Optional[str]
209 |
210 | :return: ProcessingPipeline for Microsoft Sentinel ASIM
211 | :rtype: ProcessingPipeline
212 | """
213 |
214 | pipeline_items = [
215 | ProcessingItem(
216 | identifier="sentinel_asim_set_query_table",
217 | transformation=SetQueryTableStateTransformation(
218 | query_table, CATEGORY_TO_TABLE_MAPPINGS, EVENTID_CATEGORY_TO_TABLE_MAPPINGS
219 | ),
220 | ),
221 | drop_eventid_proc_item,
222 | fieldmappings_proc_item,
223 | generic_field_mappings_proc_item,
224 | *replacement_proc_items,
225 | *rule_error_proc_items,
226 | *field_error_proc_items,
227 | ]
228 |
229 | return ProcessingPipeline(
230 | name="Generic Log Sources to Sentinel ASIM tables and fields",
231 | priority=10,
232 | items=pipeline_items,
233 | allowed_backends=frozenset(["kusto"]),
234 | postprocessing_items=[PrependQueryTablePostprocessingItem], # type: ignore
235 | )
236 |
--------------------------------------------------------------------------------
/sigma/pipelines/sentinelasim/transformations.py:
--------------------------------------------------------------------------------
1 | from ..kusto_common.transformations import BaseHashesValuesTransformation
2 |
3 |
4 | class ProcessCreateHashesValuesTransformation(BaseHashesValuesTransformation):
5 | """
6 | Transforms the Hashes field in imProcessCreate table to get rid of the hash algorithm prefix in each value.
7 | """
8 |
9 | def __init__(self):
10 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256", "SHA512", "IMPHASH"], field_prefix="TargetProcess")
11 |
12 |
13 | class FileEventHashesValuesTransformation(BaseHashesValuesTransformation):
14 | """
15 | Transforms the Hashes field in imFileEvent table to get rid of the hash algorithm prefix in each value.
16 | """
17 |
18 | def __init__(self):
19 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256", "SHA512"], field_prefix="TargetFile")
20 |
21 |
22 | class WebSessionHashesValuesTransformation(BaseHashesValuesTransformation):
23 | """
24 | Transforms the Hashes field in imWebSession table to get rid of the hash algorithm prefix in each value.
25 | """
26 |
27 | def __init__(self):
28 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256", "SHA512"], field_prefix="File")
29 |
--------------------------------------------------------------------------------
/tests/test_backend_kusto.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from sigma.backends.kusto import KustoBackend
4 | from sigma.collection import SigmaCollection
5 | from sigma.pipelines.microsoft365defender import microsoft_365_defender_pipeline
6 |
7 |
8 | @pytest.fixture
9 | def microsoft365defender_backend():
10 | return KustoBackend(processing_pipeline=microsoft_365_defender_pipeline())
11 |
12 |
13 | @pytest.fixture
14 | def kusto_backend_no_pipeline():
15 | return KustoBackend()
16 |
17 |
18 | def test_kusto_and_expression(microsoft365defender_backend: KustoBackend):
19 | assert (
20 | microsoft365defender_backend.convert(
21 | SigmaCollection.from_yaml(
22 | """
23 | title: Test
24 | status: test
25 | logsource:
26 | category: process_creation
27 | product: windows
28 | detection:
29 | sel:
30 | CommandLine: valueA
31 | User: valueB
32 | condition: sel
33 | """
34 | )
35 | )
36 | == ['DeviceProcessEvents\n| where ProcessCommandLine =~ "valueA" and AccountName =~ "valueB"']
37 | )
38 |
39 |
40 | def test_kusto_or_expression(microsoft365defender_backend: KustoBackend):
41 | assert (
42 | microsoft365defender_backend.convert(
43 | SigmaCollection.from_yaml(
44 | """
45 | title: Test
46 | status: test
47 | logsource:
48 | category: process_creation
49 | product: windows
50 | detection:
51 | sel1:
52 | CommandLine: valueA
53 | sel2:
54 | User: valueB
55 | condition: 1 of sel*
56 | """
57 | )
58 | )
59 | == ['DeviceProcessEvents\n| where ProcessCommandLine =~ "valueA" or AccountName =~ "valueB"']
60 | )
61 |
62 |
63 | def test_kusto_and_or_expression(microsoft365defender_backend: KustoBackend):
64 | assert (
65 | microsoft365defender_backend.convert(
66 | SigmaCollection.from_yaml(
67 | """
68 | title: Test
69 | status: test
70 | logsource:
71 | category: process_creation
72 | product: windows
73 | detection:
74 | sel:
75 | CommandLine:
76 | - valueA1
77 | - valueA2
78 | ProcessId:
79 | - valueB1
80 | - valueB2
81 | condition: sel
82 | """
83 | )
84 | )
85 | == [
86 | 'DeviceProcessEvents\n| where (ProcessCommandLine in~ ("valueA1", "valueA2")) and '
87 | '(ProcessId in~ ("valueB1", "valueB2"))'
88 | ]
89 | )
90 |
91 |
92 | def test_kusto_or_and_expression(microsoft365defender_backend: KustoBackend):
93 | assert (
94 | microsoft365defender_backend.convert(
95 | SigmaCollection.from_yaml(
96 | """
97 | title: Test
98 | status: test
99 | logsource:
100 | category: process_creation
101 | product: windows
102 | detection:
103 | sel1:
104 | CommandLine: valueA1
105 | ProcessId: valueB1
106 | sel2:
107 | CommandLine: valueA2
108 | ProcessId: valueB2
109 | condition: 1 of sel*
110 | """
111 | )
112 | )
113 | == [
114 | 'DeviceProcessEvents\n| where (ProcessCommandLine =~ "valueA1" and ProcessId =~ "valueB1") or '
115 | '(ProcessCommandLine =~ "valueA2" and ProcessId =~ "valueB2")'
116 | ]
117 | )
118 |
119 |
120 | def test_kusto_in_expression(microsoft365defender_backend: KustoBackend):
121 | assert (
122 | microsoft365defender_backend.convert(
123 | SigmaCollection.from_yaml(
124 | """
125 | title: Test
126 | status: test
127 | logsource:
128 | category: process_creation
129 | product: windows
130 | detection:
131 | sel:
132 | CommandLine:
133 | - valueA
134 | - valueB
135 | - valueC*
136 | condition: sel
137 | """
138 | )
139 | )
140 | == [
141 | 'DeviceProcessEvents\n| where ProcessCommandLine in~ ("valueA", "valueB") or '
142 | 'ProcessCommandLine startswith "valueC"'
143 | ]
144 | )
145 |
146 |
147 | def test_kusto_regex_query(microsoft365defender_backend: KustoBackend):
148 | assert (
149 | microsoft365defender_backend.convert(
150 | SigmaCollection.from_yaml(
151 | """
152 | title: Test
153 | status: test
154 | logsource:
155 | category: process_creation
156 | product: windows
157 | detection:
158 | sel:
159 | CommandLine|re: foo.*bar
160 | ProcessId: foo
161 | condition: sel
162 | """
163 | )
164 | )
165 | == ['DeviceProcessEvents\n| where ProcessCommandLine matches regex "foo.*bar" and ProcessId =~ "foo"']
166 | )
167 |
168 |
169 | def test_kusto_cidr_query(microsoft365defender_backend: KustoBackend):
170 | assert (
171 | microsoft365defender_backend.convert(
172 | SigmaCollection.from_yaml(
173 | """
174 | title: Test
175 | status: test
176 | logsource:
177 | category: network_connection
178 | product: windows
179 | detection:
180 | sel:
181 | SourceIp|cidr: 192.168.0.0/16
182 | condition: sel
183 | """
184 | )
185 | )
186 | == ['DeviceNetworkEvents\n| where ipv4_is_in_range(LocalIP, "192.168.0.0/16")']
187 | )
188 |
189 |
190 | def test_kusto_negation_basic(microsoft365defender_backend: KustoBackend):
191 | assert (
192 | microsoft365defender_backend.convert(
193 | SigmaCollection.from_yaml(
194 | r"""
195 | title: Test
196 | status: test
197 | logsource:
198 | product: windows
199 | category: process_creation
200 | detection:
201 | selection:
202 | Image:
203 | - '*\process.exe'
204 | CommandLine:
205 | - 'this'
206 | filter:
207 | CommandLine:
208 | - 'notthis'
209 | condition: selection and not filter
210 | """
211 | )
212 | )
213 | == [
214 | 'DeviceProcessEvents\n| where (FolderPath endswith "\\\\process.exe" and '
215 | 'ProcessCommandLine =~ "this") and '
216 | '(not(ProcessCommandLine =~ "notthis"))'
217 | ]
218 | )
219 |
220 |
221 | def test_kusto_negation_contains(microsoft365defender_backend: KustoBackend):
222 | assert (
223 | microsoft365defender_backend.convert(
224 | SigmaCollection.from_yaml(
225 | r"""
226 | title: Test
227 | status: test
228 | logsource:
229 | product: windows
230 | category: process_creation
231 | detection:
232 | selection:
233 | Image:
234 | - '*\process.exe'
235 | CommandLine:
236 | - '*this*'
237 | filter:
238 | CommandLine:
239 | - '*notthis*'
240 | condition: selection and not filter
241 | """
242 | )
243 | )
244 | == [
245 | 'DeviceProcessEvents\n| where (FolderPath endswith "\\\\process.exe" and '
246 | 'ProcessCommandLine contains "this") and '
247 | '(not(ProcessCommandLine contains "notthis"))'
248 | ]
249 | )
250 |
251 |
252 | def test_kusto_grouping(microsoft365defender_backend: KustoBackend):
253 | assert (
254 | microsoft365defender_backend.convert(
255 | SigmaCollection.from_yaml(
256 | r"""
257 | title: Net connection logic test
258 | status: test
259 | logsource:
260 | category: network_connection
261 | product: windows
262 | detection:
263 | selection:
264 | Image:
265 | - '*\powershell.exe'
266 | - '*\pwsh.exe'
267 | DestinationHostname:
268 | - '*pastebin.com*'
269 | - '*anothersite.com*'
270 | condition: selection
271 | """
272 | )
273 | )
274 | == [
275 | 'DeviceNetworkEvents\n| where (InitiatingProcessFolderPath endswith "\\\\powershell.exe" or '
276 | 'InitiatingProcessFolderPath endswith "\\\\pwsh.exe") and (RemoteUrl contains '
277 | '"pastebin.com" or RemoteUrl contains "anothersite.com")'
278 | ]
279 | )
280 |
281 |
282 | def test_kusto_escape_cmdline_slash(microsoft365defender_backend: KustoBackend):
283 | assert (
284 | microsoft365defender_backend.convert(
285 | SigmaCollection.from_yaml(
286 | r"""
287 | title: Delete All Scheduled Tasks
288 | id: 220457c1-1c9f-4c2e-afe6-9598926222c1
289 | status: test
290 | description: Detects the usage of schtasks with the delete flag and the asterisk symbol to delete all tasks from the schedule of the local computer, including tasks scheduled by other users.
291 | references:
292 | - https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/schtasks-delete
293 | author: Nasreddine Bencherchali (Nextron Systems)
294 | date: 2022-09-09
295 | tags:
296 | - attack.impact
297 | - attack.t1489
298 | logsource:
299 | category: process_creation
300 | product: windows
301 | detection:
302 | selection:
303 | Image|endswith: '\schtasks.exe'
304 | CommandLine|contains|all:
305 | - ' /delete '
306 | - '/tn \*'
307 | - ' /f'
308 | condition: selection
309 | falsepositives:
310 | - Unlikely
311 | level: high
312 | """
313 | )
314 | )
315 | == [
316 | 'DeviceProcessEvents\n| where FolderPath endswith "\\\\schtasks.exe" and '
317 | '(ProcessCommandLine contains " /delete " and '
318 | 'ProcessCommandLine contains "/tn *" and '
319 | 'ProcessCommandLine contains " /f")'
320 | ]
321 | )
322 |
323 |
324 | def test_kusto_cmdline_filters(microsoft365defender_backend: KustoBackend):
325 | assert (
326 | microsoft365defender_backend.convert(
327 | SigmaCollection.from_yaml(
328 | r"""
329 | title: New Firewall Rule Added Via Netsh.EXE
330 | id: cd5cfd80-aa5f-44c0-9c20-108c4ae12e3c
331 | status: test
332 | description: Detects the addition of a new rule to the Windows firewall via netsh
333 | references:
334 | - https://www.operationblockbuster.com/wp-content/uploads/2016/02/Operation-Blockbuster-RAT-and-Staging-Report.pdf
335 | author: Markus Neis, Sander Wiebing
336 | date: 2019-01-29
337 | modified: 2023-02-10
338 | tags:
339 | - attack.defense_evasion
340 | - attack.t1562.004
341 | - attack.s0246
342 | logsource:
343 | category: process_creation
344 | product: windows
345 | detection:
346 | selection_img:
347 | - Image|endswith: '\netsh.exe'
348 | - OriginalFileName: 'netsh.exe'
349 | selection_cli:
350 | CommandLine|contains|all:
351 | - ' firewall '
352 | - ' add '
353 | filter_optional_dropbox:
354 | CommandLine|contains:
355 | - 'advfirewall firewall add rule name=Dropbox dir=in action=allow "program=?:\Program Files (x86)\Dropbox\Client\Dropbox.exe" enable=yes profile=Any'
356 | - 'advfirewall firewall add rule name=Dropbox dir=in action=allow "program=?:\Program Files\Dropbox\Client\Dropbox.exe" enable=yes profile=Any'
357 | condition: all of selection_* and not 1 of filter_optional_*
358 | falsepositives:
359 | - Legitimate administration activity
360 | - Software installations
361 | level: medium
362 | """
363 | )
364 | )
365 | == [
366 | 'DeviceProcessEvents\n| where ((FolderPath endswith "\\\\netsh.exe" or '
367 | 'ProcessVersionInfoOriginalFileName =~ "netsh.exe") and '
368 | '(ProcessCommandLine contains " firewall " and ProcessCommandLine contains " add ")) and '
369 | '(not(((ProcessCommandLine contains "advfirewall firewall add rule name=Dropbox dir=in action=allow '
370 | '\\"program=" and ProcessCommandLine contains ":\\\\Program Files (x86)\\\\Dropbox\\\\Client\\\\Dropbox.exe\\" '
371 | 'enable=yes profile=Any") or (ProcessCommandLine contains "advfirewall firewall add rule name=Dropbox dir=in '
372 | 'action=allow \\"program=" and ProcessCommandLine contains ":\\\\Program Files\\\\Dropbox\\\\Client\\\\Dropbox.exe\\" '
373 | 'enable=yes profile=Any"))))'
374 | ]
375 | )
376 |
377 |
378 | def test_kusto_sigmanumber_conversion(kusto_backend_no_pipeline: KustoBackend):
379 | assert (
380 | kusto_backend_no_pipeline.convert(
381 | SigmaCollection.from_yaml(
382 | """
383 | title: Test
384 | status: test
385 | logsource:
386 | product: windows
387 | detection:
388 | sel:
389 | EventID: 1
390 | condition: sel
391 | """
392 | )
393 | )
394 | == ["EventID == 1"]
395 | )
396 |
397 |
398 | def test_kusto_sigmanumber_conversion_mixed_types(kusto_backend_no_pipeline: KustoBackend):
399 | assert (
400 | kusto_backend_no_pipeline.convert(
401 | SigmaCollection.from_yaml(
402 | r"""
403 | title: ETW Logging Disabled In .NET Processes - Sysmon Registry
404 | id: bf4fc428-dcc3-4bbd-99fe-2422aeee2544
405 | related:
406 | - id: a4c90ea1-2634-4ca0-adbb-35eae169b6fc
407 | type: similar
408 | status: test
409 | description: Potential adversaries stopping ETW providers recording loaded .NET assemblies.
410 | references:
411 | - https://twitter.com/_xpn_/status/1268712093928378368
412 | - https://social.msdn.microsoft.com/Forums/vstudio/en-US/0878832e-39d7-4eaf-8e16-a729c4c40975/what-can-i-use-e13c0d23ccbc4e12931bd9cc2eee27e4-for?forum=clr
413 | - https://github.com/dotnet/runtime/blob/ee2355c801d892f2894b0f7b14a20e6cc50e0e54/docs/design/coreclr/jit/viewing-jit-dumps.md#setting-configuration-variables
414 | - https://github.com/dotnet/runtime/blob/f62e93416a1799aecc6b0947adad55a0d9870732/src/coreclr/src/inc/clrconfigvalues.h#L35-L38
415 | - https://github.com/dotnet/runtime/blob/7abe42dc1123722ed385218268bb9fe04556e3d3/src/coreclr/src/inc/clrconfig.h#L33-L39
416 | - https://github.com/dotnet/runtime/search?p=1&q=COMPlus_&unscoped_q=COMPlus_
417 | - https://bunnyinside.com/?term=f71e8cb9c76a
418 | - http://managed670.rssing.com/chan-5590147/all_p1.html
419 | - https://github.com/dotnet/runtime/blob/4f9ae42d861fcb4be2fcd5d3d55d5f227d30e723/docs/coding-guidelines/clr-jit-coding-conventions.md#1412-disabling-code
420 | - https://blog.xpnsec.com/hiding-your-dotnet-complus-etwenabled/
421 | - https://i.blackhat.com/EU-21/Wednesday/EU-21-Teodorescu-Veni-No-Vidi-No-Vici-Attacks-On-ETW-Blind-EDRs.pdf
422 | author: Roberto Rodriguez (Cyb3rWard0g), OTR (Open Threat Research)
423 | date: 2020-06-05
424 | modified: 2023-08-17
425 | tags:
426 | - attack.defense-evasion
427 | - attack.t1112
428 | - attack.t1562
429 | logsource:
430 | product: windows
431 | category: registry_set
432 | detection:
433 | selection_etw_enabled:
434 | TargetObject|endswith: 'SOFTWARE\Microsoft\.NETFramework\ETWEnabled'
435 | Details: 'DWORD (0x00000000)'
436 | selection_complus:
437 | TargetObject|endswith:
438 | - '\COMPlus_ETWEnabled'
439 | - '\COMPlus_ETWFlags'
440 | Details:
441 | - 0 # For REG_SZ type
442 | - 'DWORD (0x00000000)'
443 | condition: 1 of selection_*
444 | falsepositives:
445 | - Unknown
446 | level: high
447 | """
448 | )
449 | )
450 | == [
451 | '(TargetObject endswith "SOFTWARE\\\\Microsoft\\\\.NETFramework\\\\ETWEnabled" and Details =~ "DWORD (0x00000000)") or ((TargetObject endswith "\\\\COMPlus_ETWEnabled" or '
452 | 'TargetObject endswith "\\\\COMPlus_ETWFlags") and (Details in~ ("0", "DWORD (0x00000000)")))'
453 | ]
454 | )
455 |
--------------------------------------------------------------------------------
/tests/test_pipelines_azuremonitor.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from sigma.backends.kusto import KustoBackend
4 | from sigma.collection import SigmaCollection
5 | from sigma.exceptions import SigmaTransformationError
6 | from sigma.pipelines.azuremonitor import azure_monitor_pipeline
7 | from sigma.rule import SigmaRule
8 |
9 |
10 | @pytest.fixture
11 | def azure_backend():
12 | return KustoBackend(processing_pipeline=azure_monitor_pipeline())
13 |
14 |
15 | def test_azure_monitor_process_creation_field_mapping(azure_backend):
16 | yaml_rule = """
17 | title: Test Process Creation
18 | status: test
19 | logsource:
20 | category: process_creation
21 | product: windows
22 | detection:
23 | sel:
24 | Image: C:\\Windows\\System32\\cmd.exe
25 | CommandLine: whoami
26 | User: SYSTEM
27 | ProcessId: 1234
28 | condition: sel
29 | """
30 | expected_result = [
31 | 'SecurityEvent\n| where NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe" and CommandLine =~ "whoami" and SubjectUserName =~ "SYSTEM" and NewProcessId == 1234'
32 | ]
33 |
34 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
35 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
36 |
37 |
38 | def test_azure_monitor_network_connection_field_mapping(azure_backend):
39 | yaml_rule = """
40 | title: Test Network Connection
41 | status: test
42 | logsource:
43 | category: network_connection
44 | product: windows
45 | detection:
46 | sel:
47 | DestinationIp: 8.8.8.8
48 | DestinationPort: 53
49 | SourcePort: 12345
50 | condition: sel
51 | """
52 | expected_result = [
53 | 'SecurityEvent\n| where DestinationIp =~ "8.8.8.8" and DestinationPort == 53 and SourcePort == 12345'
54 | ]
55 |
56 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
57 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
58 |
59 |
60 | def test_azure_monitor_registry_event_field_mapping(azure_backend):
61 | yaml_rule = """
62 | title: Test Registry Event
63 | status: test
64 | logsource:
65 | category: registry_event
66 | product: windows
67 | detection:
68 | sel:
69 | EventID: 13
70 | TargetObject: HKLM\\Software\\Microsoft\\Windows\\CurrentVersion\\Run
71 | condition: sel
72 | """
73 | expected_result = [
74 | 'SecurityEvent\n| where EventID == 13 and ObjectName =~ "HKEY_LOCAL_MACHINE\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run"'
75 | ]
76 |
77 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
78 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
79 |
80 |
81 | def test_azure_monitor_file_event_field_mapping(azure_backend):
82 | yaml_rule = """
83 | title: Test File Event
84 | status: test
85 | logsource:
86 | category: file_event
87 | product: windows
88 | detection:
89 | sel:
90 | TargetFilename: C:\\suspicious\\file.exe
91 | Image: C:\\Windows\\System32\\cmd.exe
92 | condition: sel
93 | """
94 | expected_result = [
95 | 'SecurityEvent\n| where ObjectName =~ "C:\\\\suspicious\\\\file.exe" and NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"'
96 | ]
97 |
98 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
99 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
100 |
101 |
102 | def test_azure_monitor_hashes_transformation(azure_backend):
103 | yaml_rule = """
104 | title: Test Hashes
105 | status: test
106 | logsource:
107 | category: process_creation
108 | product: windows
109 | detection:
110 | sel:
111 | Hashes:
112 | - md5=1234567890abcdef1234567890abcdef
113 | - sha1=1234567890abcdef1234567890abcdef12345678
114 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
115 | condition: sel
116 | """
117 | expected_result = [
118 | 'SecurityEvent\n| where FileHash in~ ("1234567890abcdef1234567890abcdef", "1234567890abcdef1234567890abcdef12345678", "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef")'
119 | ]
120 |
121 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
122 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
123 |
124 |
125 | def test_azure_monitor_registry_key_replacement(azure_backend):
126 | yaml_rule = """
127 | title: Test Registry Key Replacement
128 | status: test
129 | logsource:
130 | category: registry_event
131 | product: windows
132 | detection:
133 | sel:
134 | TargetObject:
135 | - HKLM\\Software\\Microsoft\\Windows\\CurrentVersion\\Run
136 | - HKU\\Software\\Microsoft\\Windows\\CurrentVersion\\Run
137 | - HKCR\\Software\\Microsoft\\Windows\\CurrentVersion\\Run
138 | condition: sel
139 | """
140 | expected_result = [
141 | 'SecurityEvent\n| where ObjectName in~ ("HKEY_LOCAL_MACHINE\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run", "HKEY_USERS\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run", "HKEY_LOCAL_MACHINE\\\\CLASSES\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run")'
142 | ]
143 |
144 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
145 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
146 |
147 |
148 | def test_azure_monitor_unsupported_category(azure_backend):
149 | yaml_rule = """
150 | title: Test Unsupported Category
151 | status: test
152 | logsource:
153 | category: unsupported_category
154 | product: windows
155 | detection:
156 | sel:
157 | Field: value
158 | condition: sel
159 | """
160 | with pytest.raises(SigmaTransformationError, match="Unable to determine table name from rule. "):
161 | azure_backend.convert(SigmaCollection.from_yaml(yaml_rule))
162 |
163 |
164 | def test_azure_monitor_invalid_field(azure_backend):
165 | yaml_rule = """
166 | title: Test Invalid Field
167 | status: test
168 | logsource:
169 | category: process_creation
170 | product: windows
171 | detection:
172 | sel:
173 | InvalidField: value
174 | condition: sel
175 | """
176 | with pytest.raises(
177 | SigmaTransformationError, match="Invalid SigmaDetectionItem field name encountered.*SecurityEvent"
178 | ):
179 | azure_backend.convert(SigmaCollection.from_yaml(yaml_rule))
180 |
181 |
182 | def test_azure_monitor_custom_query_table():
183 | yaml_rule = """
184 | title: Test Custom Query Table
185 | status: test
186 | logsource:
187 | category: process_creation
188 | product: windows
189 | detection:
190 | sel:
191 | CommandLine: whoami
192 | condition: sel
193 | """
194 | expected_result = ['CustomTable\n| where CommandLine =~ "whoami"']
195 |
196 | custom_backend = KustoBackend(processing_pipeline=azure_monitor_pipeline(query_table="CustomTable"))
197 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
198 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
199 |
200 |
201 | def test_azure_monitor_pipeline_custom_table_invalid_category():
202 | yaml_rule = """
203 | title: Test
204 | status: test
205 | logsource:
206 | product: windows
207 | category: blah
208 | detection:
209 | sel:
210 | Image: actuallyafileevent.exe
211 | condition: sel
212 | """
213 | expected_result = ["SecurityEvent\n| " 'where NewProcessName =~ "actuallyafileevent.exe"']
214 |
215 | custom_backend = KustoBackend(processing_pipeline=azure_monitor_pipeline(query_table="SecurityEvent"))
216 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
217 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
218 |
219 |
220 | def test_azure_monitor_eventid_mapping(azure_backend):
221 | """Test that EventID is used to determine table when category is missing"""
222 | yaml_rule = """
223 | title: Test EventID Mapping
224 | status: test
225 | logsource:
226 | product: windows
227 | detection:
228 | sel:
229 | EventID: 1
230 | Image: C:\\Windows\\System32\\cmd.exe
231 | condition: sel
232 | """
233 | # All EventIDs should map to SecurityEvent table
234 | expected_result = [
235 | 'SecurityEvent\n| where EventID == 1 and NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"'
236 | ]
237 |
238 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
239 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
240 |
241 |
242 | def test_azure_monitor_category_precedence(azure_backend):
243 | """Test that category takes precedence over EventID when both are present"""
244 | yaml_rule = """
245 | title: Test Category Precedence
246 | status: test
247 | logsource:
248 | category: file_event
249 | product: windows
250 | detection:
251 | sel:
252 | EventID: 1 # Process creation EventID, but should use file_event category
253 | Image: C:\\Windows\\System32\\cmd.exe
254 | condition: sel
255 | """
256 | # Should use SecurityEvent table based on category mapping
257 | expected_result = [
258 | 'SecurityEvent\n| where EventID == 1 and NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"'
259 | ]
260 |
261 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
262 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
263 |
--------------------------------------------------------------------------------
/tests/test_pipelines_sentinelasim.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from sigma.backends.kusto import KustoBackend
4 | from sigma.collection import SigmaCollection
5 | from sigma.exceptions import SigmaTransformationError
6 | from sigma.pipelines.sentinelasim import sentinel_asim_pipeline
7 | from sigma.rule import SigmaRule
8 |
9 |
10 | @pytest.fixture
11 | def asim_backend():
12 | return KustoBackend(processing_pipeline=sentinel_asim_pipeline())
13 |
14 |
15 | def test_sentinel_asim_process_creation_field_mapping(asim_backend):
16 | yaml_rule = """
17 | title: Test Process Creation
18 | status: test
19 | logsource:
20 | category: process_creation
21 | product: windows
22 | detection:
23 | sel:
24 | Image: C:\\Windows\\System32\\cmd.exe
25 | CommandLine: whoami
26 | User: SYSTEM
27 | ProcessId: 1234
28 | condition: sel
29 | """
30 | expected_result = [
31 | 'imProcessCreate\n| where TargetProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe" and TargetProcessCommandLine =~ "whoami" and TargetUsername =~ "SYSTEM" and TargetProcessId == 1234'
32 | ]
33 |
34 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
35 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
36 |
37 |
38 | def test_sentinel_asim_network_connection_field_mapping(asim_backend):
39 | yaml_rule = """
40 | title: Test Network Connection
41 | status: test
42 | logsource:
43 | category: network_connection
44 | product: windows
45 | detection:
46 | sel:
47 | DestinationIp: 8.8.8.8
48 | DestinationPort: 53
49 | Protocol: udp
50 | condition: sel
51 | """
52 | expected_result = [
53 | 'imNetworkSession\n| where DstIpAddr =~ "8.8.8.8" and DstPortNumber == 53 and NetworkProtocol =~ "udp"'
54 | ]
55 |
56 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
57 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
58 |
59 |
60 | def test_sentinel_asim_registry_event_field_mapping(asim_backend):
61 | yaml_rule = """
62 | title: Test Registry Event
63 | status: test
64 | logsource:
65 | category: registry_event
66 | product: windows
67 | detection:
68 | sel:
69 | TargetObject: HKLM\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run
70 | EventType: SetValue
71 | condition: sel
72 | """
73 | expected_result = [
74 | 'imRegistry\n| where RegistryKey =~ "HKEY_LOCAL_MACHINE\\\\SOFTWARE\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run" and EventType =~ "RegistryValueSet"'
75 | ]
76 |
77 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
78 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
79 |
80 |
81 | def test_sentinel_asim_custom_table():
82 | yaml_rule = """
83 | title: Test Custom Table
84 | status: test
85 | logsource:
86 | category: process_creation
87 | product: windows
88 | detection:
89 | sel:
90 | Image: malware.exe
91 | condition: sel
92 | """
93 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "malware.exe"']
94 |
95 | custom_backend = KustoBackend(processing_pipeline=sentinel_asim_pipeline(query_table="imFileEvent"))
96 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
97 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
98 |
99 |
100 | def test_sentinel_asim_unsupported_field(asim_backend):
101 | yaml_rule = """
102 | title: Test Unsupported Field
103 | status: test
104 | logsource:
105 | category: process_creation
106 | product: windows
107 | detection:
108 | sel:
109 | UnsupportedField: value
110 | condition: sel
111 | """
112 | with pytest.raises(
113 | SigmaTransformationError, match="Invalid SigmaDetectionItem field name encountered: UnsupportedField"
114 | ):
115 | asim_backend.convert(SigmaCollection.from_yaml(yaml_rule))
116 |
117 |
118 | def test_sentinel_asim_file_event(asim_backend):
119 | yaml_rule = """
120 | title: Test File Event
121 | status: test
122 | logsource:
123 | category: file_event
124 | product: windows
125 | detection:
126 | sel:
127 | Image: C:\\Windows\\explorer.exe
128 | condition: sel
129 | """
130 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "C:\\\\Windows\\\\explorer.exe"']
131 |
132 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
133 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
134 |
135 |
136 | def test_sentinel_asim_pipeline_custom_table_invalid_category():
137 | yaml_rule = """
138 | title: Test Custom Table
139 | status: test
140 | logsource:
141 | category: blah
142 | product: windows
143 | detection:
144 | sel:
145 | Image: malware.exe
146 | condition: sel
147 | """
148 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "malware.exe"']
149 |
150 | custom_backend = KustoBackend(processing_pipeline=sentinel_asim_pipeline(query_table="imFileEvent"))
151 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
152 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
153 |
154 |
155 | def test_sentinel_asim_processcreate_hashes_field_values(asim_backend):
156 | yaml_rule = """
157 | title: Test ProcessCreate Hashes Field Values
158 | status: test
159 | logsource:
160 | category: process_creation
161 | product: windows
162 | detection:
163 | sel:
164 | Hashes:
165 | - md5=1234567890abcdef1234567890abcdef
166 | - sha1=1234567890abcdef1234567890abcdef12345678
167 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
168 | - imphash=1234567890abcdef1234567890abcdef
169 | - sha512=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
170 | condition: sel
171 | """
172 | expected_result = [
173 | 'imProcessCreate\n| where TargetProcessMD5 =~ "1234567890abcdef1234567890abcdef" or TargetProcessSHA1 =~ "1234567890abcdef1234567890abcdef12345678" or TargetProcessSHA256 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" or TargetProcessIMPHASH =~ "1234567890abcdef1234567890abcdef" or TargetProcessSHA512 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"'
174 | ]
175 |
176 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
177 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
178 |
179 |
180 | def test_sentinel_asim_fileevent_hashes_field_values(asim_backend):
181 | yaml_rule = """
182 | title: Test FileEvent Hashes Field Values
183 | status: test
184 | logsource:
185 | category: file_event
186 | product: windows
187 | detection:
188 | sel:
189 | Hashes:
190 | - md5=1234567890abcdef1234567890abcdef
191 | - sha1=1234567890abcdef1234567890abcdef12345678
192 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
193 | condition: sel
194 | """
195 | expected_result = [
196 | 'imFileEvent\n| where TargetFileMD5 =~ "1234567890abcdef1234567890abcdef" or TargetFileSHA1 =~ "1234567890abcdef1234567890abcdef12345678" or TargetFileSHA256 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"'
197 | ]
198 |
199 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
200 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
201 |
202 |
203 | def test_sentinel_asim_webrequest_hashes_field_values(asim_backend):
204 | yaml_rule = """
205 | title: Test WebRequest Hashes Field Values
206 | status: test
207 | logsource:
208 | category: proxy
209 | product: windows
210 | detection:
211 | sel:
212 | Hashes:
213 | - md5=1234567890abcdef1234567890abcdef
214 | - sha1=1234567890abcdef1234567890abcdef12345678
215 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
216 | condition: sel
217 | """
218 | expected_result = [
219 | 'imWebSession\n| where FileMD5 =~ "1234567890abcdef1234567890abcdef" or FileSHA1 =~ "1234567890abcdef1234567890abcdef12345678" or FileSHA256 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"'
220 | ]
221 |
222 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
223 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
224 |
225 |
226 | def test_sentinel_asim_pipeline_unsupported_rule_type(asim_backend):
227 | yaml_rule = """
228 | title: test
229 | status: test
230 | logsource:
231 | category: invalid_category
232 | product: invalid_product
233 | detection:
234 | sel:
235 | field: whatever
236 | condition: sel
237 | """
238 | with pytest.raises(SigmaTransformationError, match="Unable to determine table name from rule. "):
239 | asim_backend.convert(SigmaCollection.from_yaml(yaml_rule))
240 |
241 |
242 | def test_sentinel_asim_eventid_mapping(asim_backend):
243 | """Test that EventID is used to determine table when category is missing"""
244 | yaml_rule = """
245 | title: Test EventID Mapping
246 | status: test
247 | logsource:
248 | product: windows
249 | detection:
250 | sel:
251 | EventID: 1
252 | Image: C:\\Windows\\System32\\cmd.exe
253 | condition: sel
254 | """
255 | # EventID 1 should map to process category -> imProcessCreate table
256 | expected_result = ['imProcessCreate\n| where TargetProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"']
257 |
258 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
259 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
260 |
261 |
262 | def test_sentinel_asim_category_precedence(asim_backend):
263 | """Test that category takes precedence over EventID when both are present"""
264 | yaml_rule = """
265 | title: Test Category Precedence
266 | status: test
267 | logsource:
268 | category: file_event
269 | product: windows
270 | detection:
271 | sel:
272 | EventID: 1 # Process creation EventID, but should use file_event category
273 | Image: C:\\Windows\\System32\\cmd.exe
274 | condition: sel
275 | """
276 | # Should use imFileEvent table based on category, not imProcessCreate from EventID
277 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"']
278 |
279 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result
280 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result
281 |
--------------------------------------------------------------------------------
/utils/get_azure_monitor_schema_tables.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import os
3 | import re
4 | from datetime import datetime, timezone
5 | from typing import Dict, List
6 |
7 | import requests
8 | from dotenv import load_dotenv
9 |
10 | load_dotenv()
11 |
12 | # GitHub API configuration
13 | GITHUB_API_KEY = os.getenv("GITHUB_API_KEY")
14 | BASE_URL = "https://api.github.com/repos/MicrosoftDocs/azure-reference-other/contents/azure-monitor-ref/tables"
15 | HEADERS = {"Accept": "application/vnd.github.v3+json"}
16 | if GITHUB_API_KEY:
17 | HEADERS["Authorization"] = f"token {GITHUB_API_KEY}"
18 |
19 | OUTPUT_FILE = "sigma/pipelines/azuremonitor/tables.py"
20 |
21 |
22 | def fetch_content(file_name: str = None) -> str:
23 | """Fetch the file content from GitHub and decode it."""
24 | url = BASE_URL
25 | if file_name:
26 | url = f"{BASE_URL}/{file_name}"
27 | response = requests.get(url, headers=HEADERS)
28 | if response.ok:
29 | try:
30 | json_content = response.json()
31 | if isinstance(json_content, dict) and "content" in json_content:
32 | return base64.b64decode(json_content["content"]).decode("utf-8")
33 | else:
34 | return response.json()
35 | except ValueError:
36 | return response.text
37 | print(f"Failed to retrieve content for {file_name}: {response.reason}")
38 | return None
39 |
40 |
41 | def extract_table_urls(json_content: dict) -> List[str]:
42 | """Extract table URLs from the json content."""
43 | return [entry["name"] for entry in json_content]
44 |
45 |
46 | def extract_table_schema(content: str, table_name: str = None) -> dict:
47 | """Extract table schema from markdown content."""
48 | match = re.search(
49 | r"\|\s*Column\s*\|\s*Type\s*\|\s*Description\s*\|\n\|[-\s|]*\n((?:\|.*\|$\n?)+)", content, re.MULTILINE
50 | )
51 | if not match:
52 | match = re.search(
53 | r"\|Column\|Type\|Description\|[\r\n]+\|---\|---\|---\|[\n\r]+(.*?)(?=\n##|\Z)", content, re.DOTALL
54 | )
55 | if not match:
56 | print(f"Field table not found in {table_name}")
57 | return {}
58 |
59 | schema_data = {}
60 | for row in match.group(1).strip().split("\n"):
61 | columns = [col.strip() for col in row.strip().strip("|").split("|")]
62 | if len(columns) >= 2:
63 | schema_data[columns[0]] = {"data_type": columns[1], "description": columns[2] if len(columns) > 2 else ""}
64 | if not schema_data:
65 | print(f"Table schema could not be parsed from {table_name}")
66 | return schema_data
67 |
68 |
69 | def process_table(file_path: str) -> dict:
70 | """Process a table file and extract the schema."""
71 | print(f"Processing table: {file_path}")
72 | content = fetch_content(file_path)
73 | if not content:
74 | return {}
75 | # Try to get table name from header after ---
76 | table_name = re.search(r"^title:.*-\s*(.+)$", content, re.MULTILINE)
77 | if not table_name:
78 | # Try to get table name from top text between ---
79 | table_name = re.search(r"^ms\.custom\:\s+(.+)", content, re.MULTILINE)
80 | table_name = table_name.group(1).strip() if table_name else None
81 | if not table_name:
82 | print(f"Table name not found in {file_path}")
83 | return {}
84 | return {table_name: extract_table_schema(content, table_name)}
85 |
86 |
87 | def write_schema(output_file: str, schema_tables: Dict[str, dict]):
88 | """Write the schema tables to a Python file."""
89 | with open(output_file, "w") as f:
90 | f.write("# This file is auto-generated. Do not edit manually.\n")
91 | f.write(f"# Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC\n\n")
92 | f.write("AZURE_MONITOR_TABLES = {\n")
93 | for table, fields in schema_tables.items():
94 | f.write(f' "{table}": {{\n')
95 | for field, info in fields.items():
96 | f.write(
97 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}}},\n'
98 | )
99 | f.write(" },\n")
100 | f.write("}\n")
101 |
102 |
103 | def get_all_includes_tables() -> dict:
104 | tables_list = fetch_content("includes")
105 | if not tables_list:
106 | return {}
107 | table_urls = ["includes/" + url for url in extract_table_urls(tables_list) if url.endswith(".md")]
108 | return {table: schema for url in table_urls for table, schema in process_table(url).items() if schema}
109 |
110 |
111 | def get_all_tables() -> dict:
112 | """Retrieve all tables from the TOC and process them."""
113 | tables_list = fetch_content()
114 | if not tables_list:
115 | return {}
116 | table_urls = [x for x in extract_table_urls(tables_list) if x.endswith(".md")]
117 | return {table: schema for url in table_urls for table, schema in process_table(url).items() if schema}
118 |
119 |
120 | if __name__ == "__main__":
121 | if not GITHUB_API_KEY:
122 | print("Warning: GITHUB_API_KEY not set. You may encounter rate limiting.")
123 | tables = get_all_tables()
124 | tables_includes = get_all_includes_tables()
125 | tables.update(tables_includes)
126 | write_schema(OUTPUT_FILE, tables)
127 | print(f"Schema written to {OUTPUT_FILE}")
128 |
--------------------------------------------------------------------------------
/utils/get_microsoft_xdr_schema_tables.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import os
3 | import re
4 | from datetime import datetime, timezone
5 | from typing import Dict, List
6 |
7 | import requests
8 | import yaml
9 | from dotenv import load_dotenv
10 |
11 | load_dotenv()
12 |
13 | # GitHub API configuration
14 | GITHUB_API_KEY = os.getenv("GITHUB_API_KEY")
15 | BASE_URL = "https://api.github.com/repos/MicrosoftDocs/defender-docs/contents/defender-xdr"
16 | HEADERS = {"Accept": "application/vnd.github.v3+json"}
17 | if GITHUB_API_KEY:
18 | HEADERS["Authorization"] = f"token {GITHUB_API_KEY}"
19 |
20 | OUTPUT_FILE = "sigma/pipelines/microsoftxdr/tables.py"
21 |
22 |
23 | def fetch_content(file_name: str) -> str:
24 | """Fetch the file content from GitHub and decode it."""
25 | url = f"{BASE_URL}/{file_name}"
26 | response = requests.get(url, headers=HEADERS)
27 | if response.ok:
28 | return base64.b64decode(response.json()["content"]).decode("utf-8")
29 | print(f"Failed to retrieve content for {file_name}: {response.reason}")
30 | return None
31 |
32 |
33 | def extract_table_urls(toc_content: str) -> List[str]:
34 | """Extract table URLs from the TOC.yml file."""
35 | toc_data = yaml.safe_load(toc_content)
36 | data_schema_section = toc_data[0]["items"]
37 | for section_name in [
38 | "Investigate and respond to threats",
39 | "Search for threats with advanced hunting",
40 | "Data schema",
41 | ]:
42 | data_schema_section = next((item for item in data_schema_section if item.get("name") == section_name), None)[
43 | "items"
44 | ]
45 | return [item["href"] for item in data_schema_section[2:] if "href" in item]
46 |
47 |
48 | def extract_table_schema(content: str) -> dict:
49 | """Extract table schema from markdown content."""
50 | match = re.search(r"\|\s?Column name\s?\|\s?Data type\s?\|\s?Description\s?\|([\s\S]+?)\n\n", content)
51 | if not match:
52 | return {}
53 |
54 | schema_data = {}
55 | for row in match.group(1).strip().split("\n")[1:]:
56 | columns = [col.strip() for col in row.strip("|").split("|")]
57 | if len(columns) == 3:
58 | schema_data[columns[0]] = {"data_type": columns[1], "description": columns[2]}
59 | return schema_data
60 |
61 |
62 | def process_table(file_path: str) -> dict:
63 | """Process a table file and extract the schema."""
64 | content = fetch_content(file_path)
65 | if not content:
66 | return {}
67 |
68 | table_name = re.search(r"^# (.+)", content, re.MULTILINE)
69 | table_name = table_name.group(1) if table_name else "Unknown"
70 | return {table_name: extract_table_schema(content)}
71 |
72 |
73 | def write_schema(output_file: str, schema_tables: Dict[str, dict]):
74 | """Write the schema tables to a Python file."""
75 | with open(output_file, "w") as f:
76 | f.write("# This file is auto-generated. Do not edit manually.\n")
77 | f.write(f"# Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC\n\n")
78 | f.write("MICROSOFT_XDR_TABLES = {\n")
79 | for table, fields in schema_tables.items():
80 | f.write(f' "{table}": {{\n')
81 | for field, info in fields.items():
82 | f.write(
83 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}}},\n'
84 | )
85 | f.write(" },\n")
86 | f.write("}\n")
87 |
88 |
89 | def get_all_tables() -> dict:
90 | """Retrieve all tables from the TOC and process them."""
91 | toc_content = fetch_content("TOC.yml")
92 | if not toc_content:
93 | return {}
94 | table_urls = extract_table_urls(toc_content)
95 | return {table: schema for url in table_urls for table, schema in process_table(url).items()}
96 |
97 |
98 | if __name__ == "__main__":
99 | if not GITHUB_API_KEY:
100 | print("Warning: GITHUB_API_KEY not set. You may encounter rate limiting.")
101 | tables = get_all_tables()
102 | write_schema(OUTPUT_FILE, tables)
103 | print(f"Schema written to {OUTPUT_FILE}")
104 |
--------------------------------------------------------------------------------
/utils/get_sentinel_asim_schema_tables.py:
--------------------------------------------------------------------------------
1 | import re
2 | from datetime import datetime, timezone
3 | from typing import Dict, List, Optional, Tuple
4 |
5 | import requests
6 | from bs4 import BeautifulSoup
7 |
8 | BASE_URL = "https://learn.microsoft.com/en-us/azure/sentinel"
9 | OUTPUT_FILE = "sigma/pipelines/sentinelasim/tables.py"
10 |
11 | # TODO: Fix common field schema not writing to file
12 |
13 |
14 | def get_request(url: str) -> requests.Response:
15 | """
16 | Sends a GET request to the specified URL and returns the response.
17 |
18 | :param url: The URL to send the GET request to.
19 | :return: The response from the GET request.
20 | """
21 | response = requests.get(url)
22 | response.raise_for_status()
23 |
24 | return response
25 |
26 |
27 | def extract_asim_schema_hrefs(items: List[dict]) -> List[str]:
28 | """Extracts hrefs for ASIM schemas from the JSON data."""
29 | for item in items:
30 | if item.get("toc_title") == "Reference":
31 | return extract_asim_schemas(item.get("children", []))
32 | return []
33 |
34 |
35 | def extract_asim_schemas(items: List[dict]) -> List[str]:
36 | """Finds the ASIM schemas section and returns the relevant hrefs."""
37 | for item in items:
38 | if item.get("toc_title").lower() == "advanced security information model (asim)":
39 | return find_schema_hrefs(item.get("children", []))
40 | return []
41 |
42 |
43 | def find_schema_hrefs(items: List[dict]) -> List[str]:
44 | """Extracts the schema hrefs, excluding legacy schemas."""
45 | hrefs = []
46 | for item in items:
47 | if item.get("toc_title").lower() == "asim schemas":
48 | for schema in item.get("children", []):
49 | if schema.get("toc_title") != "Legacy network normalization schema":
50 | hrefs.append(schema.get("href"))
51 | return hrefs
52 |
53 |
54 | def get_sentinel_asim_schema_tables() -> List[str]:
55 | """Fetches the ASIM schema table hrefs from Azure Sentinel documentation."""
56 | url = f"{BASE_URL}/toc.json"
57 | response = requests.get(url)
58 | response.raise_for_status() # Ensures proper error handling
59 | data = response.json()
60 | return extract_asim_schema_hrefs(data.get("items", []))
61 |
62 |
63 | def extract_table_name_and_fields(url: str) -> Dict[str, List[Dict[str, str]]]:
64 | """
65 | Extracts the table name and field schema from a Sentinel ASIM schema page.
66 |
67 | :param url: Full URL of the schema page.
68 | :return: A dictionary with the table name and a list of field schemas.
69 | """
70 | response = get_request(url)
71 | soup = BeautifulSoup(response.content, "html.parser")
72 |
73 | table_name = extract_table_name(soup)
74 | if table_name is None:
75 | print(f"No ASIM table found for {url}. Skipping...")
76 | return None
77 |
78 | field_data = extract_field_data(soup)
79 |
80 | return {table_name: field_data}
81 |
82 |
83 | def extract_table_name(soup: BeautifulSoup) -> Optional[str]:
84 | """
85 | Extracts the table name from the BeautifulSoup object.
86 |
87 | :param soup: BeautifulSoup object of the schema page.
88 | :return: The extracted table name or None if not found.
89 | """
90 |
91 | def extract_from_code():
92 | code_element = soup.find("code", class_="lang-kql")
93 | if not code_element:
94 | return None
95 | table_name = code_element.text.strip().split()[0]
96 | return extract_table_name_from_string(table_name)
97 |
98 | def extract_from_text():
99 | whole_text = soup.get_text()
100 | match = re.search(r"(?i)im(\w+)??", whole_text)
101 | return f"im{match.group(1)}" if match else None
102 |
103 | def extract_table_name_from_string(text):
104 | match = re.search(r"(?i)(im|_im_)(\w+)", text)
105 | return f"{match.group(1)}{match.group(2)}" if match else None
106 |
107 | return extract_from_code() or extract_from_text()
108 |
109 |
110 | def extract_field_data(soup: BeautifulSoup) -> List[Dict[str, str]]:
111 | """
112 | Extracts field data from a Sentinel ASIM schema page.
113 |
114 | :param soup: BeautifulSoup object of the schema page.
115 | :return: A list of dictionaries with the field name and type.
116 | """
117 | # schema_details_section = soup.find(id="schema-details")
118 | field_data = {}
119 |
120 | # Loop through all tables in the section and its subsections
121 | tables = soup.find_all("table")
122 | for table in tables:
123 | # Each table has columns: Field, Class, Type, Description
124 | headers = [th.text.strip() for th in table.find_all("th")]
125 | if "Field" in headers and "Class" in headers:
126 | # Parse each row of the table
127 | for row in table.find_all("tr")[1:]: # Skip header row
128 | cols = [td.text.strip() for td in row.find_all("td")]
129 | if len(cols) == 4: # Ensure we have all four columns
130 | field_data[cols[0]] = {"class": cols[1], "data_type": cols[2], "description": cols[3]}
131 | return field_data
132 |
133 |
134 | def get_common_field_data() -> List[Dict[str, str]]:
135 | """
136 | Extracts common field data from a Sentinel ASIM schema page.
137 |
138 | :return: A list of dictionaries with the field name and type.
139 | """
140 | full_url = f"{BASE_URL}/normalization-common-fields"
141 | response = get_request(full_url)
142 | soup = BeautifulSoup(response.content, "html.parser")
143 | common_field_info = extract_field_data(soup)
144 |
145 | return common_field_info
146 |
147 |
148 | def write_schema(output_file: str, schema_tables: Dict[str, dict], common_field_data: Dict[str, dict]):
149 | """Write the schema tables to a Python file."""
150 | with open(output_file, "w") as f:
151 | f.write("# This file is auto-generated. Do not edit manually.\n")
152 | f.write(f"# Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC\n\n")
153 | f.write("SENTINEL_ASIM_TABLES = {\n")
154 | for table, fields in schema_tables.items():
155 | f.write(f' "{table}": {{\n')
156 | for field, info in fields.items():
157 | f.write(
158 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}, "class": "{info["class"].strip("`")}"}},\n'
159 | )
160 | f.write(" },\n")
161 | f.write("}\n")
162 | f.write("SENTINEL_ASIM_COMMON_FIELDS = {\n")
163 | f.write(f' "COMMON": {{\n')
164 | for field, info in common_field_data.items():
165 | f.write(
166 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}, "class": "{info["class"].strip("`")}"}},\n'
167 | )
168 | f.write(" },\n")
169 | f.write("}\n")
170 |
171 |
172 | def process_asim_schemas() -> Tuple[Dict[str, dict], Dict[str, dict]]:
173 | """Processes all ASIM schemas and extracts table names and field schemas."""
174 | tables = get_sentinel_asim_schema_tables()
175 | schema_data = {}
176 | common_field_data = get_common_field_data()
177 |
178 | for href in tables:
179 | full_url = f"{BASE_URL}/{href}"
180 | print(f"Processing {full_url}...")
181 | if schema_info := extract_table_name_and_fields(full_url):
182 | schema_data.update(schema_info)
183 |
184 | return schema_data, common_field_data
185 |
186 |
187 | if __name__ == "__main__":
188 | schema_data, common_field_data = process_asim_schemas()
189 | write_schema(OUTPUT_FILE, schema_data, common_field_data)
190 | print(f"Schema written to {OUTPUT_FILE}")
191 |
--------------------------------------------------------------------------------