├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── errors.go ├── examples ├── parse-rules │ └── main.go ├── simple-rule-mapping │ ├── data.json │ ├── main.go │ ├── mapping_results.json │ └── windows │ │ └── proc_creation_win_wmic_recon_group.yml ├── simple-streamer │ └── main.go └── threaded-streamer │ └── main.go ├── go.mod ├── go.sum ├── ident.go ├── ident_test.go ├── lexer.go ├── lexer_test.go ├── nodes.go ├── parser.go ├── parser_test.go ├── pattern.go ├── rule.go ├── ruleset.go ├── sigma.go ├── token.go ├── tree.go └── tree_test.go /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test go code 2 | 3 | on: 4 | push: 5 | branches: [ master, next-* ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.18 20 | 21 | - name: Build 22 | run: go build -v ./... 23 | 24 | - name: Test Sigma v2 package 25 | run: go test -v ./... 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-sigma-rule-engine 2 | 3 | > Golang library that implements a sigma log rule parser and match engine. 4 | 5 | [Sigma](https://github.com/Neo23x0/sigma) is a open and vendor-agnostic signature format for logs. Official sigma repository includes rule format definition, a public ruleset, and python tooling for converting rules into various SIEM alert formats. Essentially, it fills the same role in logging space as Suricata does in packet capture and YARA for file analysis. However, unlike those projects, the open Sigma project does not act as a match engine. Users are still expected to run a supported SIEM or log management solution, with necessary licencing to enable alerting features. 6 | 7 | This project implements a rule parser and real-time match engine in Golang, to provide a lightweight alternative to those SIEM systems. Essentially, it's just a ~3000 line library that can be used by anyone to build their own IDS for logs. Initial version was experimental hack, cobbled together at the last minute with minimal testing, that was used by Crossed Swords 2020 exercise, organized by NATO CCDCOE. Yellow team log post-processor relied on the engine to detect Red team activities in gamenet targets, in real time. This code is archived into `pkg/sigma/v1`. 8 | 9 | Since then, I rewrote the entire engine to provide a cleaner and more tested version as reference to anyone interested in building their own IDS for logs. This code can be found in `pkg/sigma/v2`. The project also includes a cli application in `cmd/` folder, written with [cobra](https://github.com/spf13/cobra). However, it is meant to be used as reference and testing, rather than a fully-fledged tool. 10 | 11 | # Basic usage 12 | 13 | Simply pull the code using `go get`. 14 | 15 | ``` 16 | go get -u github.com/markuskont/go-sigma-rule-engine/ 17 | ``` 18 | 19 | Then import the library into your project. **PS** - this path was refactored to project root in `0.3`, as initial layout was not suitable to library project. Please update your imports accordingly when upgrading from `0.2`. 20 | 21 | ```go 22 | import ( 23 | "github.com/markuskont/go-sigma-rule-engine" 24 | ) 25 | ``` 26 | 27 | ```go 28 | ruleset, err := sigma.NewRuleset(sigma.Config{ 29 | Directory: viper.GetStringSlice("rules.dir"), 30 | }) 31 | if err != nil { 32 | return err 33 | } 34 | logrus.Debugf("Found %d files, %d ok, %d failed, %d unsupported", 35 | ruleset.Total, ruleset.Ok, ruleset.Failed, ruleset.Unsupported) 36 | ``` 37 | 38 | Events can then be evaluated against full ruleset. 39 | 40 | ```go 41 | if result, match := ruleset.EvalAll(e); match { 42 | // handle match results here here 43 | } 44 | ``` 45 | 46 | Individual rules could also be manually looped. For example, when early return is desired for avoiding full ruleset evaluation. 47 | 48 | ```go 49 | for _, rule := range ruleset.Rules { 50 | if rule.Match(e) { 51 | // handle rule match here 52 | } 53 | } 54 | ``` 55 | 56 | Note that variable `e` should implement `Event` interface. 57 | 58 | ## Matcher and Event 59 | 60 | Our Sigma rule is built as a tree where each node must satisfy the `Matcher` interface that performs boolean evaluation for events. 61 | 62 | ```go 63 | type Matcher interface { 64 | Match(Event) bool 65 | } 66 | ``` 67 | 68 | There are simply too many possible event formats for our simple Sigma library to handle. Therefore, users are expected to implement `Event` interface for any object that will be matched against the ruleset. This `Event` interface embeds field access methods for two Sigma rule types - `keyword` and `selection`. 69 | 70 | ```go 71 | // Keyworder implements keywords sigma rule type on arbitrary event 72 | // Should return list of fields that are relevant for rule matching 73 | type Keyworder interface { 74 | // Keywords implements Keyworder 75 | Keywords() ([]string, bool) 76 | } 77 | 78 | // Selector implements selection sigma rule type 79 | type Selector interface { 80 | // Select implements Selector 81 | Select(string) (interface{}, bool) 82 | } 83 | 84 | // Event implements sigma rule types by embedding Keyworder and Selector 85 | // Used by rules to extract relevant fields 86 | type Event interface { 87 | Keyworder 88 | Selector 89 | } 90 | ``` 91 | 92 | [Helper function source file](/pkg/sigma/v2/helpers.go) provides an example for handling dynamic hash maps. 93 | 94 | ### Keywords 95 | 96 | `Keywords` rule type is simply a list of patters that must exist in core message. Only one pattern must match. 97 | 98 | ```yaml 99 | keywords: 100 | - 'wget * - http* | perl' 101 | - 'wget * - http* | sh' 102 | - 'wget * - http* | bash' 103 | - 'python -m SimpleHTTPServer' 104 | ``` 105 | 106 | Thus, the `Keyworder` interface simply returns a list of unstructured fields that could be considered core messages. It is built around `slice` because some event types, like Windows EventLog, could contain multiple fields that might contain this information. And returning a `nil` slice is cleaner than empty string when keyword rule type does not apply to event. However, in that case the second return value should always be `false` to ensure early return when rule does not apply to particular message. 107 | 108 | ```go 109 | type Keyworder interface { 110 | Keywords() ([]string, bool) 111 | } 112 | ``` 113 | 114 | Dynamic JSON objects can be implemented as stub because this rule type does not support key-value lookups. 115 | 116 | ```go 117 | // Keywords implements Keyworder 118 | func (s DynamicMap) Keywords() ([]string, bool) { 119 | return nil, false 120 | } 121 | ``` 122 | 123 | Alternatively, structs for well-known and standardized messages, such as BSD syslog, might simply return the `Message` field. 124 | 125 | ```go 126 | func (m Syslog) Keywords() ([]string, bool) { 127 | return m.Message.Keywords() 128 | } 129 | ``` 130 | 131 | That message could be a unstructured string that also implements our `Event` interface. 132 | 133 | ```go 134 | type Message string 135 | 136 | func (m Message) Keywords() ([]string, bool) { 137 | return []string{string(m)}, true 138 | } 139 | ``` 140 | 141 | Dynamic structured events, like Suricata EVE, could have well known fields that might qualify as message. 142 | 143 | ```go 144 | func (s DynamicMap) Keywords() ([]string, bool) { 145 | if val, ok := s.Select("alert.signature"); ok { 146 | if str, ok := val.(string); ok { 147 | return []string{str}, true 148 | } 149 | } 150 | return nil, false 151 | } 152 | ``` 153 | 154 | Multiple fields could be extracted and passed to the rule with this method. For example, `payload_printable`, `alert.category`, etc. 155 | 156 | ### Selection 157 | 158 | This rule type is for key-value lookups. 159 | 160 | ```yaml 161 | selection: 162 | winlog.event_data.ScriptBlockText: 163 | - ' -FromBase64String' 164 | ``` 165 | 166 | Like with `keyword`, this rule type might simply may not apply to some events. 167 | 168 | ```go 169 | func (s UnstructuredEvent) Select(key string) (interface{}, bool) { 170 | return nil, false 171 | } 172 | ``` 173 | 174 | Otherwise, dynamic maps might simply implement it as wrapper for key-value lookup. 175 | 176 | ```go 177 | func (s DynamicMap) Select(key string) (interface{}, bool) { 178 | if val, ok := d[key]; ok { 179 | return val, true 180 | } 181 | return nil, false 182 | } 183 | ``` 184 | 185 | Static structs for well-standardized event formats may simply handle these lookups manually. 186 | 187 | ```go 188 | type Syslog struct { 189 | Timestamp time.Time `json:"@timestamp"` 190 | Host string `json:"host"` 191 | Program string `json:"program"` 192 | Pid int `json:"pid"` 193 | Severity int `json:"severity"` 194 | Facility int `json:"facility"` 195 | Sender net.IP `json:"ip"` 196 | 197 | Message `json:"message"` 198 | } 199 | 200 | func (m Syslog) Select(key string) (interface{}, bool) { 201 | switch key { 202 | case "timestamp", "@timestamp": 203 | return m.Timestamp, true 204 | case "host": 205 | return m.Host, true 206 | case "program": 207 | return m.Program, true 208 | case "pid": 209 | return m.Pid, true 210 | case "severity": 211 | return m.Severity, true 212 | case "facility": 213 | return m.Facility, true 214 | case "sender": 215 | if m.Sender == nil { 216 | return nil, false 217 | } 218 | return m.Sender.String(), true 219 | case "message", "msg": 220 | return m.Keywords(), true 221 | default: 222 | return nil, false 223 | } 224 | } 225 | ``` 226 | 227 | # Performance 228 | 229 | ```go 230 | BenchmarkTreePositive0-12 867567 1363 ns/op 231 | BenchmarkTreePositive1-12 862962 1494 ns/op 232 | BenchmarkTreePositive2-12 795531 1380 ns/op 233 | BenchmarkTreePositive3-12 854679 1393 ns/op 234 | BenchmarkTreePositive4-12 884188 1364 ns/op 235 | BenchmarkTreePositive5-12 809140 1390 ns/op 236 | BenchmarkTreePositive6-12 773706 1410 ns/op 237 | BenchmarkTreeNegative0-12 776173 1385 ns/op 238 | BenchmarkTreeNegative1-12 812887 1481 ns/op 239 | BenchmarkTreeNegative2-12 850477 1401 ns/op 240 | BenchmarkTreeNegative3-12 840723 1390 ns/op 241 | BenchmarkTreeNegative4-12 819126 1417 ns/op 242 | BenchmarkTreeNegative5-12 748514 1416 ns/op 243 | BenchmarkTreeNegative6-12 856683 1382 ns/op 244 | ``` 245 | 246 | # Limitations 247 | 248 | **Ruleset is not thread safe**. Nor can it be easily deep-copied due to possible pointers behind interfaces and pattern containers. Each worker thread should instantiate independent ruleset. However, public sigma ruleset only produces about ~500 rules, so overhead is currently trivial. 249 | 250 | **Library is built around distinct rules, rather than entire ruleset**. That means that each rule could run separate map lookups and no data is shared between them. While individual rules are quite efficient, even in current unoptimized form, passing each event thought entire ruleset means traversing hundreds of rules. Thus having significant performance overhead. For example, we measured that passing an ECS formatted Windows EventLog message through all Windows rules in public Sigma ruleset took 4.5 times the amount of time that was otherwise spent on simply decoding the message. 251 | 252 | **Ruleset splitting and pre-filtering must be handled by the user.** Sigma has `logsource` field to indicate which events should be evaluated against a rule. We simply handled this externally, parsing rules into a map of smaller rulesets. So, we had separate rulesets for Syslog, Snoopy, Suricata and EventLog. Logsource field was used to determine which ruleset was executed for event. 253 | 254 | **No support for aggregations or event correlation.** Relatively small amount of Sigma rules use aggregations with `count() > N` or `Near()` keywords. Implementing them in streaming scenario is quite complex, as they require sharing state between messages over sliding window. Thus requiring full event correlation to be implemented. However, this did not fit our present concurrency model where N workers load balance over common message channel and no information is shared between them. Future work. 255 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "reflect" 7 | ) 8 | 9 | // ErrInvalidRegex contextualizes broken regular expressions presented by the user 10 | type ErrInvalidRegex struct { 11 | Pattern string 12 | Err error 13 | } 14 | 15 | // Error implements error 16 | func (e ErrInvalidRegex) Error() string { 17 | return fmt.Sprintf("/%s/ %s", e.Pattern, e.Err) 18 | } 19 | 20 | // ErrMissingDetection indicates missing detection field 21 | type ErrMissingDetection struct{} 22 | 23 | func (e ErrMissingDetection) Error() string { return "sigma rule is missing detection field" } 24 | 25 | // ErrMissingConditionItem indicates that identifier in condition is missing in detection map 26 | type ErrMissingConditionItem struct { 27 | Key string 28 | } 29 | 30 | func (e ErrMissingConditionItem) Error() string { 31 | return fmt.Sprintf("missing condition identifier %s", e.Key) 32 | } 33 | 34 | // ErrEmptyDetection indicates detection field present but empty 35 | type ErrEmptyDetection struct{} 36 | 37 | func (e ErrEmptyDetection) Error() string { return "sigma rule has detection but is empty" } 38 | 39 | // ErrMissingCondition indicates missing condition field 40 | type ErrMissingCondition struct{} 41 | 42 | func (e ErrMissingCondition) Error() string { return "complex sigma rule is missing condition" } 43 | 44 | // ErrIncompleteDetection indicates a rule has defined identifiers that are missing in detection map 45 | type ErrIncompleteDetection struct { 46 | Condition string 47 | Keys []string 48 | Msg string 49 | } 50 | 51 | func (e ErrIncompleteDetection) Error() string { 52 | return fmt.Sprintf( 53 | "incomplete rule, missing fields from condition. [%s]. Has %+v. %s", 54 | e.Condition, 55 | func() []string { 56 | if e.Keys != nil { 57 | return e.Keys 58 | } 59 | return []string{} 60 | }(), 61 | e.Msg, 62 | ) 63 | } 64 | 65 | // ErrUnsupportedToken is a parser error indicating lexical token that is not yet supported 66 | // Meant to be used as informational warning, rather than application breaking error 67 | type ErrUnsupportedToken struct{ Msg string } 68 | 69 | func (e ErrUnsupportedToken) Error() string { return fmt.Sprintf("UNSUPPORTED TOKEN: %s", e.Msg) } 70 | 71 | // ErrWip indicates a rule expression that is currently Work In Progress 72 | // Functions like ErrUnsupportedToken but indicates that feature is under active development 73 | // Non-critical escape hatch while debugging 74 | type ErrWip struct{} 75 | 76 | func (e ErrWip) Error() string { return "work in progress" } 77 | 78 | // ErrParseYaml indicates YAML parsing error 79 | type ErrParseYaml struct { 80 | Path string 81 | Err error 82 | Count int 83 | } 84 | 85 | func (e ErrParseYaml) Error() string { 86 | return fmt.Sprintf("%d - File: %s; Err: %s", e.Count, e.Path, e.Err) 87 | } 88 | 89 | // ErrGotBrokenYamlFiles is a bulk error handler for dealing with broken sigma rules 90 | // Some rules are bound to fail, no reason to exit entire application 91 | // Individual errors can be collected and returned at the end 92 | // Called decides if they should be only reported or it warrants full exit 93 | type ErrBulkParseYaml struct { 94 | Errs []ErrParseYaml 95 | } 96 | 97 | func (e ErrBulkParseYaml) Error() string { 98 | return fmt.Sprintf("got %d broken yaml files", len(e.Errs)) 99 | } 100 | 101 | // ErrInvalidTokenSeq indicates expression syntax error from rule writer 102 | // For example, two indents should be separated by a logical AND / OR operator 103 | type ErrInvalidTokenSeq struct { 104 | Prev, Next Item 105 | Collected []Item 106 | } 107 | 108 | func (e ErrInvalidTokenSeq) Error() string { 109 | return fmt.Sprintf(`seq error after collecting %d elements.`+ 110 | ` Invalid token sequence %s -> %s. Values: %s -> %s.`, 111 | len(e.Collected), e.Prev.T, e.Next.T, e.Prev.Val, e.Next.Val) 112 | } 113 | 114 | // ErrIncompleteTokenSeq is invoked when lex channel drain does not end with EOF 115 | // thus indicating incomplete lexing sequence 116 | type ErrIncompleteTokenSeq struct { 117 | Expression string 118 | Items []Item 119 | Last Item 120 | } 121 | 122 | func (e ErrIncompleteTokenSeq) Error() string { 123 | return fmt.Sprintf("last element should be EOF, got token %s with value %s", 124 | e.Last.T.String(), e.Last.Val) 125 | } 126 | 127 | // ErrInvalidKeywordConstruct indicates that parser found a keyword expression 128 | // that did not match any known keyword rule structure 129 | // could be unmarshal issue 130 | type ErrInvalidKeywordConstruct struct { 131 | Msg string 132 | Expr interface{} 133 | } 134 | 135 | func (e ErrInvalidKeywordConstruct) Error() string { 136 | return fmt.Sprintf(`invalid type for parsing keyword expression. `+ 137 | `Should be slice of strings or a funky one element map where value is slice of strings. `+ 138 | `Or other stuff. Got |%+v| with type |%s|`, 139 | e.Expr, reflect.TypeOf(e.Expr).String()) 140 | } 141 | 142 | // ErrInvalidSelectionConstruct indicates that parser found a selection expression 143 | // that did not match any known selection rule structure 144 | // could be unmarshal issue 145 | type ErrInvalidSelectionConstruct struct { 146 | Msg string 147 | Expr interface{} 148 | } 149 | 150 | func (e ErrInvalidSelectionConstruct) Error() string { 151 | return fmt.Sprintf("invalid type for parsing selection expression. Got |%+v| with type |%s|", 152 | e.Expr, reflect.TypeOf(e.Expr).String()) 153 | } 154 | 155 | // ErrInvalidKind indicates that type switching function received an unsupported 156 | // or unhandled data type 157 | // Contains the type in question, arbitrary error text and keyword/selection indicator 158 | // Critical is used to indicate if this error should cause an exit or can simply 159 | // be handled as a warning for future improvements 160 | type ErrInvalidKind struct { 161 | reflect.Kind 162 | Msg string 163 | T identType 164 | Critical bool 165 | } 166 | 167 | func (e ErrInvalidKind) Error() string { 168 | return fmt.Sprintf("%s data type error. %s got %s. %s", 169 | func() string { 170 | if e.Critical { 171 | return "CRITICAL" 172 | } 173 | return "Informative" 174 | }(), e.T, e.Kind, e.Msg) 175 | } 176 | 177 | // ErrUnsupportedExpression indicates that rule expression is not yet supported by parser 178 | // mostly a type issue 179 | type ErrUnsupportedExpression struct { 180 | Msg string 181 | T identType 182 | Expr interface{} 183 | Critical bool 184 | } 185 | 186 | func (e ErrUnsupportedExpression) Error() string { 187 | return fmt.Sprintf("%s unsupported expression for %s, %s. %+v", 188 | func() string { 189 | if e.Critical { 190 | return "CRITICAL" 191 | } 192 | return "Informative" 193 | }(), e.T, e.Msg, e.Expr) 194 | } 195 | 196 | // ErrUnableToReflect indicates that kind reflection could not be done, as 197 | // typeOf returned a nil value 198 | // likely a missing pattern 199 | var ErrUnableToReflect = errors.New("unable to reflect on pattern kind") 200 | -------------------------------------------------------------------------------- /examples/parse-rules/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2020 Markus Kont alias013@gmail.com 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package main 17 | 18 | import ( 19 | "flag" 20 | "log" 21 | "strings" 22 | 23 | "github.com/markuskont/go-sigma-rule-engine" 24 | ) 25 | 26 | type counts struct { 27 | ok, fail, unsupported int 28 | } 29 | 30 | var ( 31 | flagRuleDir = flag.String("rules-dir", "", "Directories containing rules. Multiple can be defined with semicolon as separator.") 32 | ) 33 | 34 | func main() { 35 | flag.Parse() 36 | files, err := sigma.NewRuleFileList(strings.Split(*flagRuleDir, ";")) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | for _, f := range files { 41 | log.Println(f) 42 | } 43 | log.Println("Parsing rule yaml files") 44 | rules, err := sigma.NewRuleList(files, true, false, nil) 45 | if err != nil { 46 | switch err.(type) { 47 | case sigma.ErrBulkParseYaml: 48 | log.Println(err) 49 | default: 50 | log.Fatal(err) 51 | } 52 | } 53 | log.Printf("Got %d rules from yaml\n", len(rules)) 54 | log.Println("Parsing rules into AST") 55 | c := &counts{} 56 | loop: 57 | for _, raw := range rules { 58 | log.Print(raw.Path) 59 | if raw.Multipart { 60 | c.unsupported++ 61 | continue loop 62 | } 63 | _, err := sigma.NewTree(raw) 64 | if err != nil { 65 | switch err.(type) { 66 | case sigma.ErrUnsupportedToken: 67 | c.unsupported++ 68 | log.Printf("%s: %s\n", err, raw.Path) 69 | default: 70 | c.fail++ 71 | log.Printf("%s\n", err) 72 | } 73 | } else { 74 | log.Printf("%s: ok\n", raw.Path) 75 | c.ok++ 76 | } 77 | } 78 | log.Printf("OK: %d; FAIL: %d; UNSUPPORTED: %d\n", c.ok, c.fail, c.unsupported) 79 | } 80 | -------------------------------------------------------------------------------- /examples/simple-rule-mapping/data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "CommandLine":"powershell.exe -ExecutionPolicy Bypass -C \"wmic.exe group get name\"", 4 | "Company":"Microsoft Corporation", 5 | "CurrentDirectory":"C:\\Users\\victim1\\Downloads\\winlogbeat-7.5.2-windows-x86_64\\winlogbeat-7.17.9-windows-x86_64\\", 6 | "Description":"Windows PowerShell", 7 | "EventRecordID":"35114", 8 | "FileVersion":"10.0.19041.546 (WinBuild.160101.0800)", 9 | "Hashes":"SHA256=9F914D42706FE215501044ACD85A32D58AAEF1419D404FDDFA5D3B48F66CCD9F", 10 | "Image":"C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", 11 | "IntegrityLevel":"High", 12 | "LogonGuid":"{fbe589a2-781c-63f8-ec89-070000000000}", 13 | "LogonId":"0x00000000000789ec", 14 | "OriginalFileName":"PowerShell.EXE", 15 | "ParentCommandLine":"\"C:\\Users\\Public\\splunkd.exe\" -server http://192.168.1.5:8888 -group red", 16 | "ParentImage":"C:\\Users\\Public\\splunkd.exe", 17 | "ParentProcessGuid":"{fbe589a2-ff98-63fe-150e-000000001200}", 18 | "ParentProcessId":"5484", 19 | "ParentUser":"DESKTOP-IDQQB81\\victim1", 20 | "ProcessGuid":"{fbe589a2-4f85-6401-0c1a-000000001200}", 21 | "ProcessId":"1264","Product":"Microsoft® Windows® Operating System", 22 | "RuleName":"-", 23 | "TerminalSessionId":"1", 24 | "User":"DESKTOP-IDQQB81\\victim1", 25 | "UtcTime":"2023-03-03 01:38:13.179" 26 | }, 27 | { 28 | "CommandLine":"\"C:\\Windows\\System32\\Wbem\\WMIC.exe\" group get name", 29 | "Company":"Microsoft Corporation", 30 | "CurrentDirectory":"C:\\Users\\victim1\\Downloads\\winlogbeat-7.5.2-windows-x86_64\\winlogbeat-7.17.9-windows-x86_64\\", 31 | "Description":"WMI Commandline Utility", 32 | "EventRecordID":"35115", 33 | "FileVersion":"10.0.19041.1741 (WinBuild.160101.0800)", 34 | "Hashes":"SHA256=12ABB45620A7A1FFD8BB953DEBA3FCC30B8BA14B2FF523F1F519BF2BF6BA7D4C", 35 | "Image":"C:\\Windows\\System32\\wbem\\WMIC.exe", 36 | "IntegrityLevel":"High", 37 | "LogonGuid":"{fbe589a2-781c-63f8-ec89-070000000000}", 38 | "LogonId":"0x00000000000789ec", 39 | "OriginalFileName":"wmic.exe", 40 | "ParentCommandLine":"powershell.exe -ExecutionPolicy Bypass -C \"wmic.exe group get name\"", 41 | "ParentImage":"C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", 42 | "ParentProcessGuid":"{fbe589a2-4f85-6401-0c1a-000000001200}", 43 | "ParentProcessId":"1264","ParentUser":"DESKTOP-IDQQB81\\victim1", 44 | "ProcessGuid":"{fbe589a2-4f85-6401-0d1a-000000001200}", 45 | "ProcessId":"5572", 46 | "Product":"Microsoft® Windows® Operating System", 47 | "RuleName":"-", 48 | "TerminalSessionId":"1", 49 | "User":"DESKTOP-IDQQB81\\victim1", 50 | "UtcTime":"2023-03-03 01:38:13.584" 51 | } 52 | ] -------------------------------------------------------------------------------- /examples/simple-rule-mapping/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "io/ioutil" 7 | "log" 8 | "os" 9 | "strings" 10 | 11 | "github.com/markuskont/datamodels" 12 | "github.com/markuskont/go-sigma-rule-engine" 13 | ) 14 | 15 | var ( 16 | flagRuleSetPath = flag.String("path-ruleset", "./windows/", "Root folders for Sigma rules. Semicolon delimits paths.") 17 | ) 18 | 19 | func saveJSONToFile(filename string, data []interface{}) error { 20 | var jsonData []byte 21 | for _, d := range data { 22 | dJSON, err := json.Marshal(d) 23 | if err != nil { 24 | return err 25 | } 26 | jsonData = append(jsonData, dJSON...) 27 | jsonData = append(jsonData, '\n') 28 | } 29 | return ioutil.WriteFile(filename, jsonData, 0644) 30 | } 31 | func main() { 32 | 33 | log.Println("start job") 34 | flag.Parse() 35 | 36 | if *flagRuleSetPath == "" { 37 | log.Fatal("ruleset path not configured") 38 | } 39 | 40 | ruleset, err := sigma.NewRuleset(sigma.Config{ 41 | Directory: strings.Split(*flagRuleSetPath, ";"), 42 | NoCollapseWS: false, 43 | FailOnRuleParse: false, 44 | FailOnYamlParse: false, 45 | }, nil) 46 | if err != nil { 47 | log.Fatal(err) 48 | } 49 | 50 | data, err := ioutil.ReadFile("./data.json") 51 | 52 | if err != nil { 53 | log.Fatal(err) 54 | } 55 | 56 | var events []map[string]interface{} 57 | 58 | if err := json.Unmarshal([]byte(data), &events); err != nil { 59 | panic(err) 60 | } 61 | cnt := 0 62 | hit := 0 63 | sigmaResults := []interface{}{} 64 | for _, event := range events { 65 | 66 | jsonStr, err := json.Marshal(event) 67 | 68 | if err != nil { 69 | log.Println(err) 70 | } 71 | 72 | var obj datamodels.Map 73 | if err := json.Unmarshal(jsonStr, &obj); err != nil { 74 | log.Println(err) 75 | } 76 | 77 | if results, ok := ruleset.EvalAll(obj); ok && len(results) > 0 { 78 | obj["sigma_results"] = results 79 | if err != nil { 80 | log.Println(err) 81 | } 82 | sigmaResults = append(sigmaResults, obj) 83 | 84 | hit += 1 85 | } 86 | cnt += 1 87 | 88 | } 89 | log.Println("total dataset : ", cnt) 90 | log.Println("total hit rule : ", hit) 91 | file, err := os.Create("./mapping_results.json") 92 | encoder := json.NewEncoder(file) 93 | encoder.SetIndent("", " ") 94 | encoder.Encode(sigmaResults) 95 | } 96 | -------------------------------------------------------------------------------- /examples/simple-rule-mapping/mapping_results.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "CommandLine": "\"C:\\Windows\\System32\\Wbem\\WMIC.exe\" group get name", 4 | "Company": "Microsoft Corporation", 5 | "CurrentDirectory": "C:\\Users\\victim1\\Downloads\\winlogbeat-7.5.2-windows-x86_64\\winlogbeat-7.17.9-windows-x86_64\\", 6 | "Description": "WMI Commandline Utility", 7 | "EventRecordID": "35115", 8 | "FileVersion": "10.0.19041.1741 (WinBuild.160101.0800)", 9 | "Hashes": "SHA256=12ABB45620A7A1FFD8BB953DEBA3FCC30B8BA14B2FF523F1F519BF2BF6BA7D4C", 10 | "Image": "C:\\Windows\\System32\\wbem\\WMIC.exe", 11 | "IntegrityLevel": "High", 12 | "LogonGuid": "{fbe589a2-781c-63f8-ec89-070000000000}", 13 | "LogonId": "0x00000000000789ec", 14 | "OriginalFileName": "wmic.exe", 15 | "ParentCommandLine": "powershell.exe -ExecutionPolicy Bypass -C \"wmic.exe group get name\"", 16 | "ParentImage": "C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", 17 | "ParentProcessGuid": "{fbe589a2-4f85-6401-0c1a-000000001200}", 18 | "ParentProcessId": "1264", 19 | "ParentUser": "DESKTOP-IDQQB81\\victim1", 20 | "ProcessGuid": "{fbe589a2-4f85-6401-0d1a-000000001200}", 21 | "ProcessId": "5572", 22 | "Product": "Microsoft® Windows® Operating System", 23 | "RuleName": "-", 24 | "TerminalSessionId": "1", 25 | "User": "DESKTOP-IDQQB81\\victim1", 26 | "UtcTime": "2023-03-03 01:38:13.584", 27 | "sigma_results": [ 28 | { 29 | "tags": [ 30 | "attack.discovery", 31 | "attack.t1069.001" 32 | ], 33 | "id": "164eda96-11b2-430b-85ff-6a265c15bf32", 34 | "title": "Local Groups Reconnaissance Via Wmic.EXE", 35 | "description": "Detects the execution of \"wmic\" with the \"group\" flag.\nAdversaries may attempt to find local system groups and permission settings.\nThe knowledge of local system permission groups can help adversaries determine which groups exist and which users belong to a particular group.\nAdversaries may use this information to determine which users have elevated permissions, such as the users found within the local administrators group.\n" 36 | } 37 | ] 38 | } 39 | ] 40 | -------------------------------------------------------------------------------- /examples/simple-rule-mapping/windows/proc_creation_win_wmic_recon_group.yml: -------------------------------------------------------------------------------- 1 | title: Local Groups Reconnaissance Via Wmic.EXE 2 | id: 164eda96-11b2-430b-85ff-6a265c15bf32 3 | status: experimental 4 | description: | 5 | Detects the execution of "wmic" with the "group" flag. 6 | Adversaries may attempt to find local system groups and permission settings. 7 | The knowledge of local system permission groups can help adversaries determine which groups exist and which users belong to a particular group. 8 | Adversaries may use this information to determine which users have elevated permissions, such as the users found within the local administrators group. 9 | references: 10 | - https://github.com/redcanaryco/atomic-red-team/blob/f339e7da7d05f6057fdfcdd3742bfcf365fee2a9/atomics/T1069.001/T1069.001.md 11 | author: frack113 12 | date: 2021/12/12 13 | modified: 2023/02/14 14 | tags: 15 | - attack.discovery 16 | - attack.t1069.001 17 | logsource: 18 | product: windows 19 | category: process_creation 20 | detection: 21 | selection_img: 22 | - Image|endswith: '\wmic.exe' 23 | - OriginalFileName: 'wmic.exe' 24 | selection_cli: 25 | CommandLine|contains: ' group' 26 | condition: all of selection* 27 | falsepositives: 28 | - Unknown 29 | level: low 30 | -------------------------------------------------------------------------------- /examples/simple-streamer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "flag" 7 | "log" 8 | "os" 9 | "strings" 10 | 11 | "github.com/markuskont/datamodels" 12 | "github.com/markuskont/go-sigma-rule-engine" 13 | ) 14 | 15 | var ( 16 | flagRuleSetPath = flag.String("path-ruleset", "", "Root folders for Sigma rules. Semicolon delimits paths.") 17 | ) 18 | 19 | func main() { 20 | flag.Parse() 21 | if *flagRuleSetPath == "" { 22 | log.Fatal("ruleset path not configured") 23 | } 24 | ruleset, err := sigma.NewRuleset(sigma.Config{ 25 | Directory: strings.Split(*flagRuleSetPath, ";"), 26 | NoCollapseWS: false, 27 | FailOnRuleParse: false, 28 | FailOnYamlParse: false, 29 | }, nil) 30 | if err != nil { 31 | log.Fatal(err) 32 | } 33 | scanner := bufio.NewScanner(bufio.NewReader(os.Stdin)) 34 | output := os.Stdout 35 | loop: 36 | for scanner.Scan() { 37 | var obj datamodels.Map 38 | if err := json.Unmarshal(scanner.Bytes(), &obj); err != nil { 39 | log.Println(err) 40 | continue loop 41 | } 42 | if results, ok := ruleset.EvalAll(obj); ok && len(results) > 0 { 43 | obj["sigma_results"] = results 44 | encoded, err := json.Marshal(obj) 45 | if err != nil { 46 | log.Println(err) 47 | continue loop 48 | } 49 | output.Write(append(encoded, []byte("\n")...)) 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /examples/threaded-streamer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "flag" 7 | "log" 8 | "os" 9 | "strings" 10 | "sync" 11 | 12 | "github.com/markuskont/datamodels" 13 | "github.com/markuskont/go-sigma-rule-engine" 14 | ) 15 | 16 | var ( 17 | flagRuleSetPath = flag.String("path-ruleset", "", "Root folders for Sigma rules. Semicolon delimits paths.") 18 | flagWorkers = flag.Int("workers", 4, "Number of async workers") 19 | ) 20 | 21 | func main() { 22 | flag.Parse() 23 | if *flagRuleSetPath == "" { 24 | log.Fatal("ruleset path not configured") 25 | } 26 | if *flagWorkers <= 0 { 27 | log.Fatal("invalid worker count") 28 | } 29 | 30 | // ruleset setup 31 | ruleset, err := sigma.NewRuleset(sigma.Config{ 32 | Directory: strings.Split(*flagRuleSetPath, ";"), 33 | NoCollapseWS: false, 34 | FailOnRuleParse: false, 35 | FailOnYamlParse: false, 36 | }, nil) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | 41 | // syncing setup 42 | var wg sync.WaitGroup 43 | defer wg.Wait() 44 | ch := make(chan []byte, *flagWorkers) 45 | 46 | // workers setup 47 | for i := 0; i < *flagWorkers; i++ { 48 | wg.Add(1) 49 | go func() { 50 | defer wg.Done() 51 | output := os.Stdout 52 | loop: 53 | for data := range ch { 54 | var obj datamodels.Map 55 | if err := json.Unmarshal(data, &obj); err != nil { 56 | log.Println(err) 57 | continue loop 58 | } 59 | if results, ok := ruleset.EvalAll(obj); ok && len(results) > 0 { 60 | obj["sigma_results"] = results 61 | encoded, err := json.Marshal(obj) 62 | if err != nil { 63 | log.Println(err) 64 | continue loop 65 | } 66 | output.Write(append(encoded, []byte("\n")...)) 67 | } 68 | } 69 | }() 70 | } 71 | 72 | // scanner setup 73 | wg.Add(1) 74 | go func() { 75 | defer wg.Done() 76 | defer close(ch) 77 | scanner := bufio.NewScanner(bufio.NewReader(os.Stdin)) 78 | for scanner.Scan() { 79 | // need to copy the bytes as scanner.Bytes is modified in place 80 | cpy := make([]byte, len(scanner.Bytes())) 81 | copy(cpy, scanner.Bytes()) 82 | ch <- cpy 83 | } 84 | }() 85 | } 86 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/markuskont/go-sigma-rule-engine 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/gobwas/glob v0.2.3 7 | github.com/markuskont/datamodels v0.0.1 8 | gopkg.in/yaml.v2 v2.4.0 9 | ) 10 | 11 | require ( 12 | github.com/kr/text v0.2.0 // indirect 13 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect 14 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 2 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 3 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= 4 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= 5 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 6 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 7 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 8 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 9 | github.com/markuskont/datamodels v0.0.1 h1:Pibmdtfp4hTypvmFmmCPIkSPxUZ6rpi/myd8U9F/5y4= 10 | github.com/markuskont/datamodels v0.0.1/go.mod h1:dyie+4X2Pmask9qB6PS89+Xq6v0Hjm+anprlucH1JcA= 11 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 12 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 13 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 14 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 16 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 17 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 18 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 19 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 20 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 21 | -------------------------------------------------------------------------------- /ident.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "reflect" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | type identType int 12 | 13 | func (i identType) String() string { 14 | switch i { 15 | case identKeyword: 16 | return "KEYWORD" 17 | case identSelection: 18 | return "SELECTION" 19 | default: 20 | return "UNK" 21 | } 22 | } 23 | 24 | const ( 25 | identErr identType = iota 26 | identSelection 27 | identKeyword 28 | ) 29 | 30 | func checkIdentType(name string, data interface{}) identType { 31 | t := reflectIdentKind(data) 32 | if strings.HasPrefix(name, "keyword") { 33 | if data == nil { 34 | return identKeyword 35 | } 36 | if t != identKeyword { 37 | return identErr 38 | } 39 | } 40 | return t 41 | } 42 | 43 | func reflectIdentKind(data interface{}) identType { 44 | switch v := data.(type) { 45 | case map[string]interface{}, map[interface{}]interface{}: 46 | return identSelection 47 | case []interface{}: 48 | k, ok := isSameKind(v) 49 | if !ok { 50 | return identErr 51 | } 52 | switch k { 53 | case reflect.Map: 54 | return identSelection 55 | default: 56 | return identKeyword 57 | } 58 | default: 59 | return identKeyword 60 | } 61 | } 62 | 63 | func newRuleFromIdent(rule interface{}, kind identType, noCollapseWS bool) (Branch, error) { 64 | switch kind { 65 | case identKeyword: 66 | return NewKeyword(rule, noCollapseWS) 67 | case identSelection: 68 | return NewSelectionBranch(rule, noCollapseWS) 69 | } 70 | return nil, fmt.Errorf("unknown rule kind, should be keyword or selection") 71 | } 72 | 73 | // Keyword is a container for patterns joined by logical disjunction 74 | type Keyword struct { 75 | S StringMatcher 76 | stats 77 | } 78 | 79 | // Match implements Matcher 80 | func (k Keyword) Match(msg Event) (bool, bool) { 81 | msgs, ok := msg.Keywords() 82 | if !ok { 83 | return false, false 84 | } 85 | for _, m := range msgs { 86 | if k.S.StringMatch(m) { 87 | return true, true 88 | } 89 | } 90 | return false, true 91 | } 92 | 93 | func NewKeyword(expr interface{}, noCollapseWS bool) (*Keyword, error) { 94 | switch val := expr.(type) { 95 | case []string: 96 | return newStringKeyword(TextPatternKeyword, false, noCollapseWS, val...) 97 | case []interface{}: 98 | k, ok := isSameKind(val) 99 | if !ok { 100 | return nil, ErrInvalidKind{ 101 | Kind: reflect.Array, 102 | T: identKeyword, 103 | Critical: false, 104 | Msg: "mixed type slice", 105 | } 106 | } 107 | switch v := k; { 108 | case v == reflect.String: 109 | return newStringKeyword(TextPatternKeyword, false, noCollapseWS, castIfaceToString(val)...) 110 | default: 111 | return nil, ErrInvalidKind{ 112 | Kind: v, 113 | T: identKeyword, 114 | Critical: false, 115 | Msg: "unsupported data type", 116 | } 117 | } 118 | 119 | default: 120 | // TODO 121 | return nil, ErrInvalidKeywordConstruct{Expr: expr} 122 | } 123 | } 124 | 125 | func newStringKeyword(mod TextPatternModifier, lower, noCollapseWS bool, patterns ...string) (*Keyword, error) { 126 | matcher, err := NewStringMatcher(mod, lower, false, noCollapseWS, patterns...) 127 | if err != nil { 128 | return nil, err 129 | } 130 | return &Keyword{S: matcher}, nil 131 | } 132 | 133 | type SelectionNumItem struct { 134 | Key string 135 | Pattern NumMatcher 136 | } 137 | 138 | type SelectionStringItem struct { 139 | Key string 140 | Pattern StringMatcher 141 | } 142 | 143 | type Selection struct { 144 | N []SelectionNumItem 145 | S []SelectionStringItem 146 | stats 147 | } 148 | 149 | // Match implements Matcher 150 | // TODO - numeric and boolean pattern match 151 | func (s Selection) Match(msg Event) (bool, bool) { 152 | for _, v := range s.N { 153 | val, ok := msg.Select(v.Key) 154 | if !ok { 155 | return false, false 156 | } 157 | switch vt := val.(type) { 158 | case string: 159 | n, err := strconv.Atoi(vt) 160 | if err != nil { 161 | // TODO - better debugging 162 | return false, true 163 | } 164 | if !v.Pattern.NumMatch(n) { 165 | return false, true 166 | } 167 | case json.Number: 168 | n, err := vt.Int64() 169 | if err != nil { 170 | // TODO - better debugging 171 | return false, true 172 | } 173 | if !v.Pattern.NumMatch(int(n)) { 174 | return false, true 175 | } 176 | case float64: 177 | // JSON numbers are all by spec float64 values 178 | if !v.Pattern.NumMatch(int(vt)) { 179 | return false, true 180 | } 181 | case int: 182 | // JSON numbers are all by spec float64 values 183 | if !v.Pattern.NumMatch(vt) { 184 | return false, true 185 | } 186 | case int64: 187 | // JSON numbers are all by spec float64 values 188 | if !v.Pattern.NumMatch(int(vt)) { 189 | return false, true 190 | } 191 | case int32: 192 | // JSON numbers are all by spec float64 values 193 | if !v.Pattern.NumMatch(int(vt)) { 194 | return false, true 195 | } 196 | case uint: 197 | // JSON numbers are all by spec float64 values 198 | if !v.Pattern.NumMatch(int(vt)) { 199 | return false, true 200 | } 201 | case uint32: 202 | // JSON numbers are all by spec float64 values 203 | if !v.Pattern.NumMatch(int(vt)) { 204 | return false, true 205 | } 206 | case uint64: 207 | // JSON numbers are all by spec float64 values 208 | if !v.Pattern.NumMatch(int(vt)) { 209 | return false, true 210 | } 211 | } 212 | } 213 | for _, v := range s.S { 214 | val, ok := msg.Select(v.Key) 215 | if !ok { 216 | return false, false 217 | } 218 | switch vt := val.(type) { 219 | case string: 220 | if !v.Pattern.StringMatch(vt) { 221 | return false, true 222 | } 223 | case json.Number: 224 | if !v.Pattern.StringMatch(vt.String()) { 225 | return false, true 226 | } 227 | case float64: 228 | // TODO - tmp hack that also loses floating point accuracy 229 | if !v.Pattern.StringMatch(strconv.Itoa(int(vt))) { 230 | return false, true 231 | } 232 | default: 233 | s.incrementMismatchCount() 234 | return false, true 235 | } 236 | } 237 | return true, true 238 | } 239 | 240 | func (s *Selection) incrementMismatchCount() *Selection { 241 | s.stats.TypeMismatchCount++ 242 | return s 243 | } 244 | 245 | func newSelectionFromMap(expr map[string]interface{}, noCollapseWS bool) (*Selection, error) { 246 | sel := &Selection{S: make([]SelectionStringItem, 0)} 247 | for key, pattern := range expr { 248 | var mod TextPatternModifier 249 | var all bool 250 | if strings.Contains(key, "|") { 251 | bits := strings.Split(key, "|") 252 | // allow support for longer chaining later on; simplifies specifier validation as well (I think) 253 | for _, curBit := range bits[1:] { 254 | // excepting 'all', the supported modifiers are mutually exclusive; last one wins 255 | switch curBit { 256 | case "startswith": 257 | mod = TextPatternPrefix 258 | case "endswith": 259 | mod = TextPatternSuffix 260 | case "re": 261 | mod = TextPatternRegex // this is really a type, not a transformation per spec 262 | case "contains": 263 | mod = TextPatternContains 264 | case "all": 265 | all = true 266 | default: 267 | return nil, fmt.Errorf("selection key %s specifier %s invalid", 268 | key, curBit) 269 | } 270 | } 271 | // strip off the specifier from the key so we can look it up correctly 272 | key = bits[0] 273 | } 274 | switch pat := pattern.(type) { 275 | case string: 276 | m, err := NewStringMatcher(mod, false, all, noCollapseWS, pat) 277 | if err != nil { 278 | return nil, err 279 | } 280 | sel.S = append(sel.S, SelectionStringItem{Key: key, Pattern: m}) 281 | case int: 282 | m, err := NewNumMatcher(pat) 283 | if err != nil { 284 | return nil, err 285 | } 286 | sel.N = func() []SelectionNumItem { 287 | item := SelectionNumItem{ 288 | Key: key, Pattern: m, 289 | } 290 | if sel.N == nil { 291 | sel.N = []SelectionNumItem{item} 292 | } 293 | return append(sel.N, item) 294 | }() 295 | case []interface{}: 296 | // TODO - move this part to separate function and reuse in NewKeyword 297 | k, ok := isSameKind(pat) 298 | if !ok { 299 | return nil, ErrInvalidKind{ 300 | Kind: reflect.Array, 301 | T: identKeyword, 302 | Critical: false, 303 | Msg: "mixed type slice", 304 | } 305 | } 306 | switch k { 307 | case reflect.String: 308 | m, err := NewStringMatcher(mod, false, all, noCollapseWS, castIfaceToString(pat)...) 309 | if err != nil { 310 | return nil, err 311 | } 312 | sel.S = append(sel.S, SelectionStringItem{Key: key, Pattern: m}) 313 | case reflect.Int: 314 | m, err := NewNumMatcher(castIfaceToInt(pat)...) 315 | if err != nil { 316 | return nil, err 317 | } 318 | sel.N = func() []SelectionNumItem { 319 | item := SelectionNumItem{ 320 | Key: key, Pattern: m, 321 | } 322 | if sel.N == nil { 323 | sel.N = []SelectionNumItem{item} 324 | } 325 | return append(sel.N, item) 326 | }() 327 | default: 328 | return nil, ErrInvalidKind{ 329 | Kind: k, 330 | T: identKeyword, 331 | Critical: false, 332 | Msg: "unsupported data type", 333 | } 334 | } 335 | default: 336 | if t := reflect.TypeOf(pattern); t != nil { 337 | return nil, ErrInvalidKind{ 338 | Kind: t.Kind(), 339 | T: identSelection, 340 | Critical: true, 341 | Msg: "unsupported selection value", 342 | } 343 | } 344 | return nil, ErrUnableToReflect 345 | } 346 | } 347 | return sel, nil 348 | } 349 | 350 | func NewSelectionBranch(expr interface{}, noCollapseWS bool) (Branch, error) { 351 | switch v := expr.(type) { 352 | case []interface{}: 353 | selections := make([]Branch, 0) 354 | for _, item := range v { 355 | b, err := NewSelectionBranch(item, noCollapseWS) 356 | if err != nil { 357 | return nil, err 358 | } 359 | selections = append(selections, b) 360 | } 361 | return NodeSimpleOr(selections).Reduce(), nil 362 | case map[interface{}]interface{}: 363 | return newSelectionFromMap(cleanUpInterfaceMap(v), noCollapseWS) 364 | default: 365 | return nil, ErrInvalidKind{ 366 | Kind: reflect.TypeOf(expr).Kind(), 367 | T: identSelection, 368 | Critical: true, 369 | Msg: "unsupported selection root container", 370 | } 371 | } 372 | } 373 | 374 | func isSameKind(data []interface{}) (reflect.Kind, bool) { 375 | var current, last reflect.Kind 376 | for i, d := range data { 377 | cType := reflect.TypeOf(d) 378 | if cType == nil { 379 | return reflect.Invalid, false 380 | } 381 | current = cType.Kind() 382 | if i > 0 { 383 | if current != last { 384 | return current, false 385 | } 386 | } 387 | last = current 388 | } 389 | return current, true 390 | } 391 | 392 | func castIfaceToString(items []interface{}) []string { 393 | tx := make([]string, 0) 394 | for _, val := range items { 395 | tx = append(tx, fmt.Sprintf("%v", val)) 396 | } 397 | return tx 398 | } 399 | 400 | func castIfaceToInt(items []interface{}) []int { 401 | tx := make([]int, 0) 402 | for _, val := range items { 403 | if n, ok := val.(int); ok { 404 | tx = append(tx, n) 405 | } 406 | } 407 | return tx 408 | } 409 | 410 | // Yaml can have non-string keys, so go-yaml unmarshals to map[interface{}]interface{} 411 | // really annoying 412 | func cleanUpInterfaceMap(rx map[interface{}]interface{}) map[string]interface{} { 413 | tx := make(map[string]interface{}) 414 | for k, v := range rx { 415 | tx[fmt.Sprintf("%v", k)] = v 416 | } 417 | return tx 418 | } 419 | 420 | // stats holds various rule statistics 421 | type stats struct { 422 | TypeMismatchCount uint64 423 | } 424 | -------------------------------------------------------------------------------- /ident_test.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/markuskont/datamodels" 9 | "gopkg.in/yaml.v2" 10 | ) 11 | 12 | type identExampleType int 13 | 14 | const ( 15 | ident1 identExampleType = iota 16 | ident2 17 | ) 18 | 19 | type identPosNegCases struct { 20 | Pos, Neg []Event 21 | } 22 | 23 | type identTestCase struct { 24 | IdentCount int 25 | IdentTypes []identType 26 | Rule string 27 | Pos, Neg []string 28 | 29 | Example identExampleType 30 | } 31 | 32 | func (i identTestCase) sigma() (*identPosNegCases, error) { 33 | posContainer := make([]Event, 0) 34 | negContainer := make([]Event, 0) 35 | switch i.Example { 36 | case ident1: 37 | if i.Pos == nil || len(i.Pos) == 0 { 38 | return nil, fmt.Errorf("missing positive test cases") 39 | } 40 | for _, c := range i.Pos { 41 | var obj simpleKeywordAuditEventExample1 42 | if err := json.Unmarshal([]byte(c), &obj); err != nil { 43 | return nil, err 44 | } 45 | posContainer = append(posContainer, obj) 46 | } 47 | for _, c := range i.Neg { 48 | var obj simpleKeywordAuditEventExample1 49 | if err := json.Unmarshal([]byte(c), &obj); err != nil { 50 | return nil, err 51 | } 52 | negContainer = append(negContainer, obj) 53 | } 54 | return &identPosNegCases{Pos: posContainer, Neg: negContainer}, nil 55 | case ident2: 56 | if i.Pos == nil || len(i.Pos) == 0 { 57 | return nil, fmt.Errorf("missing positive test cases") 58 | } 59 | for _, c := range i.Pos { 60 | var obj datamodels.Map 61 | if err := json.Unmarshal([]byte(c), &obj); err != nil { 62 | return nil, err 63 | } 64 | posContainer = append(posContainer, obj) 65 | } 66 | if i.Neg == nil || len(i.Neg) == 0 { 67 | return nil, fmt.Errorf("missing negative test cases") 68 | } 69 | for _, c := range i.Neg { 70 | var obj datamodels.Map 71 | if err := json.Unmarshal([]byte(c), &obj); err != nil { 72 | return nil, err 73 | } 74 | negContainer = append(negContainer, obj) 75 | } 76 | return &identPosNegCases{Pos: posContainer, Neg: negContainer}, nil 77 | } 78 | return nil, fmt.Errorf("Unknown identifier test case") 79 | } 80 | 81 | type simpleKeywordAuditEventExample1 struct { 82 | Command string `json:"cmd"` 83 | } 84 | 85 | // Keywords implements Keyworder 86 | func (s simpleKeywordAuditEventExample1) Keywords() ([]string, bool) { 87 | return []string{s.Command}, true 88 | } 89 | 90 | // Select implements Selector 91 | func (s simpleKeywordAuditEventExample1) Select(_ string) (interface{}, bool) { 92 | return nil, false 93 | } 94 | 95 | var identSelection1 = ` 96 | --- 97 | detection: 98 | condition: selection 99 | selection: 100 | winlog.event_data.ScriptBlockText|contains: 101 | - ' -FromBase64String' 102 | - '::FromBase64String' 103 | ` 104 | 105 | var identSelection1pos1 = ` 106 | { 107 | "event_id": 4104, 108 | "channel": "Microsoft-Windows-PowerShell/Operational", 109 | "task": "Execute a Remote Command", 110 | "opcode": "On create calls", 111 | "version": 1, 112 | "record_id": 1559, 113 | "winlog": { 114 | "event_data": { 115 | "MessageNumber": "1", 116 | "MessageTotal": "1", 117 | "ScriptBlockText": "$s=New-Object IO.MemoryStream(,[Convert]::FromBase64String(\"OMITTED BASE64 STRING\"));", 118 | "ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b" 119 | } 120 | } 121 | } 122 | ` 123 | 124 | // another command 125 | var identSelection1neg1 = ` 126 | { 127 | "event_id": 4104, 128 | "channel": "Microsoft-Windows-PowerShell/Operational", 129 | "task": "Execute a Remote Command", 130 | "opcode": "On create calls", 131 | "version": 1, 132 | "record_id": 1559, 133 | "winlog": { 134 | "event_data": { 135 | "MessageNumber": "1", 136 | "MessageTotal": "1", 137 | "ScriptBlockText": "Some awesome command", 138 | "ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b" 139 | } 140 | } 141 | } 142 | ` 143 | 144 | // missing field 145 | var identSelection1neg2 = ` 146 | { 147 | "event_id": 4104, 148 | "channel": "Microsoft-Windows-PowerShell/Operational", 149 | "task": "Execute a Remote Command", 150 | "opcode": "On create calls", 151 | "version": 1, 152 | "record_id": 1559, 153 | "winlog": { 154 | "event_data": { 155 | "MessageNumber": "1", 156 | "MessageTotal": "1", 157 | "ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b" 158 | } 159 | } 160 | } 161 | ` 162 | 163 | var identKeyword1 = ` 164 | --- 165 | detection: 166 | condition: keywords 167 | keywords: 168 | - 'bash -c' 169 | - 'cat /etc/shadow' 170 | ` 171 | 172 | var identKeyword1pos1 = ` 173 | { "cmd": "sudo bash -c \"cat /etc/shadow /etc/group /etc/passwd\"" } 174 | ` 175 | 176 | var identKeyword1neg1 = ` 177 | { "cmd": "sh -c \"cat /etc/resolv.conf\"" } 178 | ` 179 | 180 | var identKeyword2 = ` 181 | --- 182 | detection: 183 | condition: keywords 184 | keywords: 185 | - 'wget * - http* | perl' 186 | - 'wget * - http* | sh' 187 | - 'wget * - http* | bash' 188 | - "*python -m Simple*Server" 189 | ` 190 | 191 | var identKeyword2pos1 = ` 192 | { "cmd": "/usr/bin/python -m SimpleHTTPServer" } 193 | ` 194 | 195 | var identKeyword2neg1 = ` 196 | { "cmd": "/usr/bin/python -m pip install --user pip" } 197 | ` 198 | 199 | var identKeyword3 = ` 200 | --- 201 | detection: 202 | condition: keywords 203 | keywords: 204 | - '/\S+python.* -m Simple\w+Server.*/' 205 | ` 206 | 207 | var identSelection2 = ` 208 | --- 209 | detection: 210 | condition: selection 211 | selection: 212 | event_id: 213 | - 8888 214 | - 1337 215 | - 13 216 | ` 217 | 218 | var identSelection3 = ` 219 | --- 220 | detection: 221 | condition: selection 222 | selection: 223 | event_id: 1337 224 | ` 225 | 226 | var identSelection2pos1 = ` 227 | { 228 | "event_id": 1337, 229 | "channel": "Microsoft-Windows-PowerShell/Operational", 230 | "task": "Execute a Remote Command", 231 | "opcode": "On create calls", 232 | "version": 1, 233 | "record_id": 1559, 234 | "winlog": { 235 | "event_data": { 236 | "MessageNumber": "1", 237 | "MessageTotal": "1", 238 | "ScriptBlockText": "Some awesome command", 239 | "ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b" 240 | } 241 | } 242 | } 243 | ` 244 | 245 | var identSelection2neg1 = ` 246 | { 247 | "event_id": 4104, 248 | "channel": "Microsoft-Windows-PowerShell/Operational", 249 | "task": "Execute a Remote Command", 250 | "opcode": "On create calls", 251 | "version": 1, 252 | "record_id": 1559, 253 | "winlog": { 254 | "event_data": { 255 | "MessageNumber": "1", 256 | "MessageTotal": "1", 257 | "ScriptBlockText": "Some awesome command", 258 | "ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b" 259 | } 260 | } 261 | } 262 | ` 263 | 264 | var identSelection2neg2 = ` 265 | { 266 | "channel": "Microsoft-Windows-PowerShell/Operational", 267 | "task": "Execute a Remote Command", 268 | "opcode": "On create calls", 269 | "version": 1, 270 | "record_id": 1559, 271 | "winlog": { 272 | "event_data": { 273 | "MessageNumber": "1", 274 | "MessageTotal": "1", 275 | "ScriptBlockText": "Some awesome command", 276 | "ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b" 277 | } 278 | } 279 | } 280 | ` 281 | 282 | var selectionCases = []identTestCase{ 283 | { 284 | IdentCount: 1, 285 | Rule: identSelection1, 286 | IdentTypes: []identType{identSelection}, 287 | Pos: []string{identSelection1pos1}, 288 | Neg: []string{identSelection1neg1, identSelection1neg2}, 289 | Example: ident2, 290 | }, 291 | { 292 | IdentCount: 1, 293 | Rule: identSelection2, 294 | IdentTypes: []identType{identSelection}, 295 | Pos: []string{identSelection2pos1}, 296 | Neg: []string{identSelection2neg1, identSelection2neg2}, 297 | Example: ident2, 298 | }, 299 | { 300 | IdentCount: 1, 301 | Rule: identSelection3, 302 | IdentTypes: []identType{identSelection}, 303 | Pos: []string{identSelection2pos1}, 304 | Neg: []string{identSelection2neg1, identSelection2neg2}, 305 | Example: ident2, 306 | }, 307 | } 308 | 309 | var keywordCases = []identTestCase{ 310 | { 311 | IdentCount: 1, 312 | Rule: identKeyword1, 313 | IdentTypes: []identType{identKeyword}, 314 | Pos: []string{identKeyword1pos1}, 315 | Neg: []string{identKeyword1neg1}, 316 | Example: ident1, 317 | }, 318 | { 319 | IdentCount: 1, 320 | Rule: identKeyword2, 321 | IdentTypes: []identType{identKeyword}, 322 | Pos: []string{identKeyword2pos1}, 323 | Neg: []string{identKeyword2neg1}, 324 | Example: ident1, 325 | }, 326 | { 327 | IdentCount: 1, 328 | Rule: identKeyword3, 329 | IdentTypes: []identType{identKeyword}, 330 | Pos: []string{identKeyword2pos1}, 331 | Neg: []string{identKeyword2neg1}, 332 | Example: ident1, 333 | }, 334 | } 335 | 336 | var identCases = append(keywordCases, selectionCases...) 337 | 338 | func TestParseIdent(t *testing.T) { 339 | for i, c := range identCases { 340 | var r Rule 341 | if err := yaml.Unmarshal([]byte(c.Rule), &r); err != nil { 342 | t.Fatalf("ident case %d yaml parse fail: %s", i+1, err) 343 | } 344 | condition, ok := r.Detection["condition"].(string) 345 | if !ok { 346 | t.Fatalf("ident case %d missing condition", i+1) 347 | } 348 | l := lex(condition) 349 | var items, j int 350 | keywords := make([]Matcher, 0) 351 | selections := make([]Matcher, 0) 352 | for item := range l.items { 353 | switch item.T { 354 | case TokIdentifier: 355 | val, ok := r.Detection[item.Val] 356 | if !ok { 357 | t.Fatalf("ident case %d missing ident %s or unable to extract", i+1, item.Val) 358 | } 359 | items++ 360 | if k := checkIdentType(item.Val, val); k != c.IdentTypes[j] { 361 | t.Fatalf("ident case %d ident %d kind mismatch expected %s got %s", 362 | i+1, j+1, c.IdentTypes[j], k) 363 | } 364 | switch c.IdentTypes[j] { 365 | case identKeyword: 366 | kw, err := NewKeyword(val, false) 367 | if err != nil { 368 | t.Fatalf("ident case %d token %d failed to parse as keyword: %s", 369 | i+1, j+1, err) 370 | } 371 | keywords = append(keywords, kw) 372 | case identSelection: 373 | sel, err := NewSelectionBranch(val, false) 374 | if err != nil { 375 | t.Fatalf("ident case %d token %d failed to parse as selection: %s", 376 | i+1, j+1, err) 377 | } 378 | selections = append(selections, sel) 379 | } 380 | j++ 381 | } 382 | } 383 | if items != c.IdentCount { 384 | t.Fatalf("ident case %d defined element count %d does not match processd %d", 385 | i+1, c.IdentCount, items) 386 | } 387 | cases, err := c.sigma() 388 | if err != nil { 389 | t.Fatalf("ident case %d unable to cast test cases to sigma events, err: %s", 390 | i+1, err) 391 | } 392 | for _, rule := range keywords { 393 | if rule == nil { 394 | t.Fatalf("ident case %d nil rule pointer", i+1) 395 | } 396 | for j, c := range cases.Pos { 397 | m, _ := rule.Match(c) 398 | if !m { 399 | t.Fatalf("ident case %d positive test case %d did not match %s", 400 | i+1, j+1, c) 401 | } 402 | } 403 | for j, c := range cases.Neg { 404 | m, _ := rule.Match(c) 405 | if m { 406 | t.Fatalf("ident case %d negative test case %d did not match %s", 407 | i+1, j+1, c) 408 | } 409 | } 410 | } 411 | for _, rule := range selections { 412 | if rule == nil { 413 | t.Fatalf("ident case %d nil rule pointer", i+1) 414 | } 415 | for j, c := range cases.Pos { 416 | m, _ := rule.Match(c) 417 | if !m { 418 | t.Fatalf("ident case %d positive test case %d did not match %s", 419 | i+1, j+1, c) 420 | } 421 | } 422 | for j, c := range cases.Neg { 423 | m, _ := rule.Match(c) 424 | if m { 425 | t.Fatalf("ident case %d negative test case %d did not match %s", 426 | i+1, j+1, c) 427 | } 428 | } 429 | } 430 | } 431 | } 432 | -------------------------------------------------------------------------------- /lexer.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "unicode" 7 | "unicode/utf8" 8 | ) 9 | 10 | type lexer struct { 11 | input string // we'll store the string being parsed 12 | start int // the position we started scanning 13 | position int // the current position of our scan 14 | width int // we'll be using runes which can be double byte 15 | items chan Item // the channel we'll use to communicate between the lexer and the parser 16 | } 17 | 18 | // lex creates a lexer and starts scanning the provided input. 19 | func lex(input string) *lexer { 20 | l := &lexer{ 21 | input: input, 22 | items: make(chan Item), // unbuffered 23 | } 24 | go l.scan() 25 | return l 26 | } 27 | 28 | // ignore resets the start position to the current scan position effectively 29 | // ignoring any input. 30 | func (l *lexer) ignore() { 31 | l.start = l.position 32 | } 33 | 34 | // next advances the lexer state to the next rune. 35 | func (l *lexer) next() (r rune) { 36 | if l.position >= len(l.input) { 37 | l.width = 0 38 | return eof 39 | } 40 | 41 | r, l.width = utf8.DecodeRuneInString(l.todo()) 42 | l.position += l.width 43 | return r 44 | } 45 | 46 | // backup allows us to step back one rune which is helpful when you've crossed 47 | // a boundary from one state to another. 48 | func (l *lexer) backup() { 49 | l.position = l.position - 1 50 | } 51 | 52 | // scan will step through the provided text and execute state functions as 53 | // state changes are observed in the provided input. 54 | func (l *lexer) scan() { 55 | // When we begin processing, let's assume we're going to process text. 56 | // One state function will return another until `nil` is returned to signal 57 | // the end of our process. 58 | for fn := lexCondition; fn != nil; { 59 | fn = fn(l) 60 | } 61 | close(l.items) 62 | } 63 | 64 | func (l *lexer) unsuppf(format string, args ...interface{}) stateFn { 65 | msg := fmt.Sprintf(format, args...) 66 | l.items <- Item{T: TokUnsupp, Val: msg} 67 | return nil 68 | } 69 | 70 | func (l *lexer) errorf(format string, args ...interface{}) stateFn { 71 | msg := fmt.Sprintf(format, args...) 72 | l.items <- Item{T: TokErr, Val: msg} 73 | return nil 74 | } 75 | 76 | // emit sends a item over the channel so the parser can collect and manage 77 | // each segment. 78 | func (l *lexer) emit(k Token) { 79 | i := Item{T: k, Val: l.input[l.start:l.position]} 80 | l.items <- i 81 | l.ignore() // reset our scanner now that we've dispatched a segment 82 | } 83 | 84 | func (l lexer) collected() string { return l.input[l.start:l.position] } 85 | func (l lexer) todo() string { return l.input[l.position:] } 86 | 87 | // stateFn is a function that is specific to a state within the string. 88 | type stateFn func(*lexer) stateFn 89 | 90 | // lexCondition scans what is expected to be text. 91 | func lexCondition(l *lexer) stateFn { 92 | for { 93 | if strings.HasPrefix(l.todo(), TokStOne.Literal()) { 94 | return lexOneOf 95 | } 96 | if strings.HasPrefix(l.todo(), TokStAll.Literal()) { 97 | return lexAllOf 98 | } 99 | switch r := l.next(); { 100 | case r == eof: 101 | return lexEOF 102 | case r == TokSepRpar.Rune(): 103 | return lexRparWithTokens 104 | case r == TokSepLpar.Rune(): 105 | return lexLpar 106 | case r == TokSepPipe.Rune(): 107 | return lexPipe 108 | case unicode.IsSpace(r): 109 | return lexAccumulateBeforeWhitespace 110 | } 111 | } 112 | } 113 | 114 | func lexStatement(l *lexer) stateFn { 115 | return lexCondition 116 | } 117 | 118 | func lexOneOf(l *lexer) stateFn { 119 | l.position += len(TokStOne.Literal()) 120 | l.emit(TokStOne) 121 | return lexCondition 122 | } 123 | 124 | func lexAllOf(l *lexer) stateFn { 125 | l.position += len(TokStAll.Literal()) 126 | l.emit(TokStAll) 127 | return lexCondition 128 | } 129 | 130 | func lexAggs(l *lexer) stateFn { 131 | return l.unsuppf("aggregation not supported yet [%s]", l.input) 132 | } 133 | 134 | func lexEOF(l *lexer) stateFn { 135 | if l.position > l.start { 136 | l.emit(checkKeyWord(l.collected())) 137 | } 138 | l.emit(TokLitEof) 139 | return nil 140 | } 141 | 142 | func lexPipe(l *lexer) stateFn { 143 | l.emit(TokSepPipe) 144 | return lexAggs 145 | } 146 | 147 | func lexLpar(l *lexer) stateFn { 148 | l.emit(TokSepLpar) 149 | return lexCondition 150 | } 151 | 152 | func lexRparWithTokens(l *lexer) stateFn { 153 | // emit any text we've accumulated. 154 | if l.position > l.start { 155 | l.backup() 156 | // There may be N whitespace chars between token RPAR 157 | // TODO - may be a more concise way to do this, right now loops like this are everywhere 158 | 159 | if t := checkKeyWord(l.collected()); t != TokNil { 160 | l.emit(t) 161 | } 162 | 163 | for { 164 | switch r := l.next(); { 165 | case r == eof: 166 | return lexEOF 167 | case unicode.IsSpace(r): 168 | l.ignore() 169 | default: 170 | return lexRpar 171 | } 172 | } 173 | } 174 | return lexRpar 175 | } 176 | 177 | func lexRpar(l *lexer) stateFn { 178 | l.emit(TokSepRpar) 179 | return lexCondition 180 | } 181 | 182 | func lexAccumulateBeforeWhitespace(l *lexer) stateFn { 183 | l.backup() 184 | // emit any text we've accumulated. 185 | if l.position > l.start { 186 | l.emit(checkKeyWord(l.collected())) 187 | } 188 | return lexWhitespace 189 | } 190 | 191 | // lexWhitespace scans what is expected to be whitespace. 192 | func lexWhitespace(l *lexer) stateFn { 193 | for { 194 | switch r := l.next(); { 195 | case r == eof: 196 | return lexEOF 197 | case !unicode.IsSpace(r): 198 | l.backup() 199 | return lexCondition 200 | default: 201 | l.ignore() 202 | } 203 | } 204 | } 205 | 206 | func checkKeyWord(in string) Token { 207 | if len(in) == 0 { 208 | return TokNil 209 | } 210 | switch strings.ToLower(in) { 211 | case TokKeywordAnd.Literal(): 212 | return TokKeywordAnd 213 | case TokKeywordOr.Literal(): 214 | return TokKeywordOr 215 | case TokKeywordNot.Literal(): 216 | return TokKeywordNot 217 | case "sum", "min", "max", "count", "avg": 218 | return TokKeywordAgg 219 | case TokIdentifierAll.Literal(): 220 | return TokIdentifierAll 221 | case TokStOne.Literal(): 222 | return TokStOne 223 | default: 224 | if strings.Contains(in, "*") { 225 | return TokIdentifierWithWildcard 226 | } 227 | return TokIdentifier 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /lexer_test.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import "testing" 4 | 5 | type LexTestCase struct { 6 | Expr string 7 | Tokens []Token 8 | } 9 | 10 | var LexPosCases = []LexTestCase{ 11 | { 12 | Expr: "selection", 13 | Tokens: []Token{TokIdentifier, TokLitEof}, 14 | }, 15 | { 16 | Expr: "selection_1 and not filter_0", 17 | Tokens: []Token{ 18 | TokIdentifier, TokKeywordAnd, TokKeywordNot, TokIdentifier, TokLitEof, 19 | }, 20 | }, 21 | { 22 | Expr: "((selection_1 and not filter_0) OR (keyword_0 and not filter1)) or idontcare", 23 | Tokens: []Token{ 24 | TokSepLpar, TokSepLpar, TokIdentifier, TokKeywordAnd, TokKeywordNot, TokIdentifier, 25 | TokSepRpar, TokKeywordOr, TokSepLpar, TokIdentifier, TokKeywordAnd, TokKeywordNot, 26 | TokIdentifier, TokSepRpar, TokSepRpar, TokKeywordOr, TokIdentifier, TokLitEof, 27 | }, 28 | }, 29 | { 30 | Expr: "all of selection* and not 1 of filter* | count() > 10", 31 | Tokens: []Token{ 32 | TokStAll, TokIdentifierWithWildcard, TokKeywordAnd, TokKeywordNot, TokStOne, 33 | TokIdentifierWithWildcard, TokSepPipe, TokUnsupp, TokIdentifier, TokLitEof, 34 | }, 35 | }, 36 | } 37 | 38 | func TestLex(t *testing.T) { 39 | for j, c := range LexPosCases { 40 | l := lex(c.Expr) 41 | var i int 42 | for item := range l.items { 43 | if item.T != c.Tokens[i] { 44 | t.Fatalf( 45 | "lex case %d expr %s failed on item %d expected %s got %s", 46 | j, c.Expr, i, c.Tokens[i].String(), item.T.String()) 47 | } 48 | i++ 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /nodes.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | // NodeSimpleAnd is a list of matchers connected with logical conjunction 4 | type NodeSimpleAnd []Branch 5 | 6 | // Match implements Matcher 7 | func (n NodeSimpleAnd) Match(e Event) (bool, bool) { 8 | for _, b := range n { 9 | match, applicable := b.Match(e) 10 | if !match || !applicable { 11 | return match, applicable 12 | } 13 | } 14 | return true, true 15 | } 16 | 17 | // Reduce cleans up unneeded slices 18 | // Static structures can be used if node only holds one or two elements 19 | // Avoids pointless runtime loops 20 | func (n NodeSimpleAnd) Reduce() Branch { 21 | if len(n) == 1 { 22 | return n[0] 23 | } 24 | if len(n) == 2 { 25 | return &NodeAnd{L: n[0], R: n[1]} 26 | } 27 | return n 28 | } 29 | 30 | // NodeSimpleOr is a list of matchers connected with logical disjunction 31 | type NodeSimpleOr []Branch 32 | 33 | // Reduce cleans up unneeded slices 34 | // Static structures can be used if node only holds one or two elements 35 | // Avoids pointless runtime loops 36 | func (n NodeSimpleOr) Reduce() Branch { 37 | if len(n) == 1 { 38 | return n[0] 39 | } 40 | if len(n) == 2 { 41 | return &NodeOr{L: n[0], R: n[1]} 42 | } 43 | return n 44 | } 45 | 46 | // Match implements Matcher 47 | func (n NodeSimpleOr) Match(e Event) (bool, bool) { 48 | var oneApplicable bool 49 | for _, b := range n { 50 | match, applicable := b.Match(e) 51 | if match { 52 | return true, true 53 | } 54 | if applicable { 55 | oneApplicable = true 56 | } 57 | } 58 | return false, oneApplicable 59 | } 60 | 61 | // NodeNot negates a branch 62 | type NodeNot struct { 63 | B Branch 64 | } 65 | 66 | // Match implements Matcher 67 | func (n NodeNot) Match(e Event) (bool, bool) { 68 | match, applicable := n.B.Match(e) 69 | if !applicable { 70 | return match, applicable 71 | } 72 | return !match, applicable 73 | } 74 | 75 | // NodeAnd is a two element node of a binary tree with Left and Right branches 76 | // connected via logical conjunction 77 | type NodeAnd struct { 78 | L, R Branch 79 | } 80 | 81 | // Match implements Matcher 82 | func (n NodeAnd) Match(e Event) (bool, bool) { 83 | lMatch, lApplicable := n.L.Match(e) 84 | if !lMatch { 85 | return false, lApplicable 86 | } 87 | rMatch, rApplicable := n.R.Match(e) 88 | return lMatch && rMatch, lApplicable && rApplicable 89 | } 90 | 91 | // NodeOr is a two element node of a binary tree with Left and Right branches 92 | // connected via logical disjunction 93 | type NodeOr struct { 94 | L, R Branch 95 | } 96 | 97 | // Match implements Matcher 98 | func (n NodeOr) Match(e Event) (bool, bool) { 99 | lMatch, lApplicable := n.L.Match(e) 100 | if lMatch { 101 | return true, lApplicable 102 | } 103 | rMatch, rApplicable := n.R.Match(e) 104 | return lMatch || rMatch, lApplicable || rApplicable 105 | } 106 | 107 | func newNodeNotIfNegated(b Branch, negated bool) Branch { 108 | if negated { 109 | return &NodeNot{B: b} 110 | } 111 | return b 112 | } 113 | 114 | // TODO - use these functions to create binary trees instead of dunamic slices 115 | func newConjunction(s NodeSimpleAnd) Branch { 116 | if l := len(s); l == 1 || l == 2 { 117 | return s.Reduce() 118 | } 119 | return &NodeAnd{ 120 | L: s[0], 121 | R: newConjunction(s[1:]), 122 | } 123 | } 124 | 125 | func newDisjunction(s NodeSimpleOr) Branch { 126 | if l := len(s); l == 1 || l == 2 { 127 | return s.Reduce() 128 | } 129 | return &NodeOr{ 130 | L: s[0], 131 | R: newDisjunction(s[1:]), 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type parser struct { 8 | // lexer that tokenizes input string 9 | lex *lexer 10 | 11 | // container for collected tokens and their values 12 | tokens []Item 13 | 14 | // memorize last token to validate proper sequence 15 | // for example, two identifiers have to be joined via logical AND or OR, otherwise the sequence is invalid 16 | previous Item 17 | 18 | // sigma detection map that contains condition query and relevant fields 19 | sigma Detection 20 | 21 | // for debug 22 | condition string 23 | 24 | // resulting rule that can be collected later 25 | result Branch 26 | 27 | // if true, stops the parser from collapsing whitespace in non-regex rules (default is false to collapse) 28 | // and the data that will be matched against them; default is to collapse whitespace to allow for better 29 | // matching in the event that a bad actor attempts to pad whitespace inot a command to fool the engine 30 | noCollapseWS bool 31 | } 32 | 33 | func (p *parser) run() error { 34 | if p.lex == nil { 35 | return fmt.Errorf("cannot run condition parser, lexer not initialized") 36 | } 37 | // Pass 1: collect tokens, do basic sequence validation and collect sigma fields 38 | if err := p.collect(); err != nil { 39 | return err 40 | } 41 | // Pass 2: find groups 42 | if err := p.parse(); err != nil { 43 | return err 44 | } 45 | return nil 46 | } 47 | 48 | func (p *parser) parse() error { 49 | res, err := newBranch(p.sigma, p.tokens, 0, p.noCollapseWS) 50 | if err != nil { 51 | return err 52 | } 53 | p.result = res 54 | return nil 55 | } 56 | 57 | // collect gathers all items from lexer and does preliminary sequence validation 58 | func (p *parser) collect() error { 59 | for item := range p.lex.items { 60 | if item.T == TokUnsupp { 61 | return ErrUnsupportedToken{Msg: item.Val} 62 | } 63 | if p.previous.T != TokBegin && !validTokenSequence(p.previous.T, item.T) { 64 | return ErrInvalidTokenSeq{ 65 | Prev: p.previous, 66 | Next: item, 67 | Collected: p.tokens, 68 | } 69 | } 70 | if item.T != TokLitEof { 71 | p.tokens = append(p.tokens, item) 72 | } 73 | p.previous = item 74 | } 75 | if p.previous.T != TokLitEof { 76 | return ErrIncompleteTokenSeq{ 77 | Expression: p.condition, 78 | Items: p.tokens, 79 | Last: p.previous, 80 | } 81 | } 82 | return nil 83 | } 84 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/gobwas/glob" 7 | "gopkg.in/yaml.v2" 8 | ) 9 | 10 | var detection1 = ` 11 | detection: 12 | condition: "selection1 and not selection3" 13 | selection1: 14 | Image: 15 | - '*\schtasks.exe' 16 | - '*\nslookup.exe' 17 | - '*\certutil.exe' 18 | - '*\bitsadmin.exe' 19 | - '*\mshta.exe' 20 | ParentImage: 21 | - '*\mshta.exe' 22 | - '*\powershell.exe' 23 | - '*\cmd.exe' 24 | - '*\rundll32.exe' 25 | - '*\cscript.exe' 26 | - '*\wscript.exe' 27 | - '*\wmiprvse.exe' 28 | selection3: 29 | CommandLine: "+R +H +S +A *.cui" 30 | ` 31 | 32 | var detection1_positive = ` 33 | { 34 | "Image": "C:\\test\\bitsadmin.exe", 35 | "CommandLine": "+R +H +A asd.cui", 36 | "ParentImage": "C:\\test\\wmiprvse.exe", 37 | "Image": "C:\\test\\bitsadmin.exe", 38 | "CommandLine": "aaa", 39 | "ParentImage": "C:\\test\\wmiprvse.exe" 40 | } 41 | ` 42 | 43 | var detection1_negative1 = ` 44 | { 45 | "Image": "C:\\test\\bitsadmin.exe", 46 | "CommandLine": "+R +H +S +A lll.cui", 47 | "ParentImage": "C:\\test\\mshta.exe" 48 | } 49 | ` 50 | 51 | var detection1_negative2 = ` 52 | { 53 | "Image": "C:\\test\\bitsadmin.exe", 54 | "CommandLine": "+R +H +S +A lll.cui" 55 | } 56 | ` 57 | 58 | var detection2 = ` 59 | detection: 60 | condition: "(selection1 and selection2) and not selection3" 61 | selection1: 62 | Image: 63 | - '*\schtasks.exe' 64 | - '*\nslookup.exe' 65 | - '*\certutil.exe' 66 | - '*\bitsadmin.exe' 67 | - '*\mshta.exe' 68 | selection2: 69 | ParentImage: 70 | - '*\mshta.exe' 71 | - '*\powershell.exe' 72 | - '*\cmd.exe' 73 | - '*\rundll32.exe' 74 | - '*\cscript.exe' 75 | - '*\wscript.exe' 76 | - '*\wmiprvse.exe' 77 | selection3: 78 | CommandLine: "+R +H +S +A *.cui" 79 | ` 80 | 81 | var detection3 = ` 82 | detection: 83 | condition: "(selection1 or selection2) and not selection3" 84 | selection1: 85 | Image: 86 | - '*\schtasks.exe' 87 | - '*\nslookup.exe' 88 | - '*\certutil.exe' 89 | - '*\bitsadmin.exe' 90 | - '*\mshta.exe' 91 | selection2: 92 | ParentImage: 93 | - '*\mshta.exe' 94 | - '*\powershell.exe' 95 | - '*\cmd.exe' 96 | - '*\rundll32.exe' 97 | - '*\cscript.exe' 98 | - '*\wscript.exe' 99 | - '*\wmiprvse.exe' 100 | selection3: 101 | CommandLine: "+R +H +S +A *.cui" 102 | ` 103 | 104 | var detection3_positive1 = ` 105 | { 106 | "Image": "C:\\test\\bitsadmin.exe", 107 | "CommandLine": "+R +H +A asd.cui", 108 | "ParentImage": "C:\\test\\custom.exe", 109 | "Image": "C:\\test\\bitsadmin.exe", 110 | "CommandLine": "aaa", 111 | "ParentImage": "C:\\test\\wmiprvse.exe" 112 | } 113 | ` 114 | 115 | var detection3_positive2 = ` 116 | { 117 | "Image": "C:\\test\\custom.exe", 118 | "CommandLine": "+R +H +A asd.cui", 119 | "ParentImage": "C:\\test\\wmiprvse.exe", 120 | "Image": "C:\\test\\bitsadmin.exe", 121 | "CommandLine": "aaa", 122 | "ParentImage": "C:\\test\\wmiprvse.exe" 123 | } 124 | ` 125 | 126 | var detection3_negative = ` 127 | { 128 | "Image": "C:\\test\\bitsadmin.exe", 129 | "CommandLine": "+R +H +S +A lll.cui", 130 | "ParentImage": "C:\\test\\mshta.exe" 131 | } 132 | ` 133 | 134 | var detection4 = ` 135 | detection: 136 | condition: "all of selection* and not filter" 137 | selection1: 138 | Image: 139 | - '*\schtasks.exe' 140 | - '*\nslookup.exe' 141 | - '*\certutil.exe' 142 | - '*\bitsadmin.exe' 143 | - '*\mshta.exe' 144 | selection2: 145 | ParentImage: 146 | - '*\mshta.exe' 147 | - '*\powershell.exe' 148 | - '*\cmd.exe' 149 | - '*\rundll32.exe' 150 | - '*\cscript.exe' 151 | - '*\wscript.exe' 152 | - '*\wmiprvse.exe' 153 | filter: 154 | CommandLine: "+R +H +S +A *.cui" 155 | ` 156 | 157 | var detection5 = ` 158 | detection: 159 | condition: "1 of selection* and not filter" 160 | selection1: 161 | Image: 162 | - '*\schtasks.exe' 163 | - '*\nslookup.exe' 164 | - '*\certutil.exe' 165 | - '*\bitsadmin.exe' 166 | - '*\mshta.exe' 167 | selection2: 168 | ParentImage: 169 | - '*\mshta.exe' 170 | - '*\powershell.exe' 171 | - '*\cmd.exe' 172 | - '*\rundll32.exe' 173 | - '*\cscript.exe' 174 | - '*\wscript.exe' 175 | - '*\wmiprvse.exe' 176 | filter: 177 | CommandLine: "+R +H +S +A *.cui" 178 | ` 179 | 180 | var detection6 = ` 181 | detection: 182 | condition: "all of them" 183 | selection_images: 184 | Image: 185 | - '*\schtasks.exe' 186 | - '*\nslookup.exe' 187 | - '*\certutil.exe' 188 | - '*\bitsadmin.exe' 189 | - '*\mshta.exe' 190 | selection_parent_images: 191 | ParentImage: 192 | - '*\mshta.exe' 193 | - '*\powershell.exe' 194 | - '*\cmd.exe' 195 | - '*\rundll32.exe' 196 | - '*\cscript.exe' 197 | - '*\wscript.exe' 198 | - '*\wmiprvse.exe' 199 | ` 200 | 201 | var detection6_positive = ` 202 | { 203 | "Image": "C:\\test\\bitsadmin.exe", 204 | "CommandLine": "+R +H +A asd.cui", 205 | "ParentImage": "C:\\test\\wmiprvse.exe", 206 | "Image": "C:\\test\\bitsadmin.exe", 207 | "CommandLine": "aaa", 208 | "ParentImage": "C:\\test\\wmiprvse.exe" 209 | } 210 | ` 211 | 212 | var detection6_negative = ` 213 | { 214 | "Image": "C:\\test\\bitsadmin.exe", 215 | "CommandLine": "+R +H +S +A lll.cui", 216 | "ParentImage": "C:\\test\\mshta\\lll.exe" 217 | } 218 | ` 219 | 220 | var detection7 = ` 221 | detection: 222 | condition: "1 of them" 223 | selection_images: 224 | Image: 225 | - '*\schtasks.exe' 226 | - '*\nslookup.exe' 227 | - '*\certutil.exe' 228 | - '*\bitsadmin.exe' 229 | - '*\mshta.exe' 230 | selection_parent_images: 231 | ParentImage: 232 | - '*\mshta.exe' 233 | - '*\powershell.exe' 234 | - '*\cmd.exe' 235 | - '*\rundll32.exe' 236 | - '*\cscript.exe' 237 | - '*\wscript.exe' 238 | - '*\wmiprvse.exe' 239 | ` 240 | 241 | var detection7_negative1 = ` 242 | { 243 | "Image": "C:\\test\\bytesadmin.exe", 244 | "CommandLine": "+R +H +S +A lll.cui", 245 | "ParentImage": "E:\\go\\bin\\gofmt" 246 | } 247 | ` 248 | 249 | var detection7_negative2 = ` 250 | { 251 | "Image": "C:\\test\\bytesadmin.exe", 252 | "CommandLine": "+R +H +S +A lll.cui" 253 | } 254 | ` 255 | 256 | var detection8 = ` 257 | detection: 258 | condition: "selection1 and not selection3" 259 | selection1: 260 | Image: 261 | - '*\schtasks.exe' 262 | - '*\nslookup.exe' 263 | - '*\certutil.exe' 264 | - '*\bitsadmin.exe' 265 | - '*\mshta.exe' 266 | ParentImage: 267 | - '*\mshta.exe' 268 | - '*\powershell.exe' 269 | - '*\cmd.exe' 270 | - '*\rundll32.exe' 271 | - '*\cscript.exe' 272 | - '*\wscript.exe' 273 | - '*\wmiprvse.exe' 274 | selection3: 275 | CommandLine: "+R +H +S +A *.cui" 276 | ` 277 | 278 | var detection8_positive = ` 279 | { 280 | "Image": "C:\\test\\bitsadmin.exe", 281 | "CommandLine": "+R +H +A asd.cui", 282 | "ParentImage": "C:\\test\\wmiprvse.exe", 283 | "Image": "C:\\test\\bitsadmin.exe", 284 | "CommandLine": "aaa", 285 | "ParentImage": "C:\\test\\wmiprvse.exe" 286 | } 287 | ` 288 | 289 | var detection8_negative1 = ` 290 | { 291 | "Image": "C:\\test\\bitsadmin.exe", 292 | "CommandLine": "+R +H +S +A lll.cui", 293 | "ParentImage": "C:\\test\\mshta.exe" 294 | } 295 | ` 296 | 297 | var detection8_negative2 = ` 298 | { 299 | "Image": "C:\\test\\bitsadmin.exe", 300 | "ParentImage": "C:\\test\\mshta.exe" 301 | } 302 | ` 303 | 304 | var detection9 = ` 305 | detection: 306 | condition: "selection" 307 | selection: 308 | - PipeName|re: '\\\\SomePipeName[0-9a-f]{2}' 309 | - PipeName2|re: '\\\\AnotherPipe[0-9a-f]*Name' 310 | ` 311 | 312 | var detection9_positive = ` 313 | { 314 | "PipeName": "\\\\SomePipeNamea4", 315 | "PipeName2": "\\\\AnotherPipe0af3Name" 316 | } 317 | ` 318 | 319 | var detection9_negative = ` 320 | { 321 | "PipeName": "\\\\SomePipeNameZZ", 322 | "PipeName2": "\\\\AnotherPipe01zzName" 323 | } 324 | ` 325 | 326 | var detection10 = ` 327 | detection: 328 | condition: "selection1 and selection2" 329 | selection1: 330 | - SomeName|startswith: 'TestStart' 331 | selection2: 332 | - SomeName|endswith: 'TestEnd' 333 | ` 334 | 335 | var detection10_positive = ` 336 | { 337 | "SomeName": "TestStart-Value-TestEnd" 338 | } 339 | ` 340 | 341 | var detection10_negative = ` 342 | { 343 | "SomeName": "TestStart-Value" 344 | } 345 | ` 346 | 347 | var detection11 = ` 348 | detection: 349 | condition: "selection1 and selection2" 350 | selection1: 351 | SomeName|contains|all: 352 | - 'mark1' 353 | - 'mark2' 354 | selection2: 355 | SomeName|contains: 356 | - 'version1' 357 | - 'version2' 358 | ` 359 | 360 | var detection11_positive = ` 361 | { 362 | "SomeName": "Some mark1 mark2 String version2" 363 | } 364 | ` 365 | 366 | var detection11_negative = ` 367 | { 368 | "SomeName": "mark1 mark2 mark3 non-matching string" 369 | } 370 | ` 371 | 372 | var detection12 = ` 373 | detection: 374 | condition: "selection1 and selection2" 375 | selection1: 376 | SomeKey|contains|all: 377 | - 'val1' 378 | - 'val2' 379 | selection2: 380 | SomeKey2: 381 | - 'mustMatch1' 382 | - 'mustMatch2' 383 | ` 384 | 385 | var detection12_positive = ` 386 | { 387 | "SomeKey": "val1 val2", 388 | "SomeKey2": "mustMatch1" 389 | } 390 | ` 391 | 392 | var detection12_negative = ` 393 | { 394 | "SomeKey": "val1 val2", 395 | "SomeKey2": "mustMatch3" 396 | } 397 | ` 398 | 399 | // this test is a bit tricky: 400 | // the '*\bits\*admin.exe' is looking to match '[wildCard]\bits*admin.exe' (one wildcard at head, one escaped wildcard) 401 | // the '\\\\DoubleBackslash\\some*.exe' is looking to match '\\DoubleBackslash\some[wildCard].exe' (multiple backslashes, one wildcard) 402 | // the '\leadingBackslash\\*.exe' is looking to match '\leadingBackslash\[wildCard].exe' (one wildcard and leading backslash) 403 | // the 'full\\\*plaintext.exe' is looking to match 'full\*plaintext.exe' (no wildcards exact match) 404 | var detection13 = ` 405 | detection: 406 | condition: "all of them" 407 | selection_images: 408 | Image: 409 | - '*\bits\*admin.exe' 410 | - '\\\\DoubleBackslash\\some*.exe' 411 | - '[Windows-*]\image.???' 412 | selection_parent_images: 413 | ParentImage: 414 | - '\leadingBackslash\\*.exe' 415 | - 'full\\\*plaintext.exe' 416 | - '{000-aaa-*}\\*.exe' 417 | ` 418 | 419 | var detection13_positive = ` 420 | { 421 | "Image": "C:\\test\\bits*admin.exe", 422 | "ParentImage": "\\leadingBackslash\\something.exe" 423 | } 424 | ` 425 | 426 | var detection13_positive2 = ` 427 | { 428 | "Image": "\\\\DoubleBackslash\\someOther.exe", 429 | "ParentImage": "full\\*plaintext.exe" 430 | } 431 | ` 432 | 433 | var detection13_positive3 = ` 434 | { 435 | "Image": "C:\\test\\bits*admin.exe", 436 | "ParentImage": "full\\*plaintext.exe" 437 | } 438 | ` 439 | 440 | var detection13_positive4 = ` 441 | { 442 | "Image": "[Windows-Security]\\image.cmd", 443 | "ParentImage": "{000-aaa-123}\\evil.exe" 444 | } 445 | ` 446 | 447 | // won't match as Image is looking for '*\bits*admin.exe' witha leading wildcard and an escaped '*' between bits and admin 448 | // this provides 'C:\test\bitsadmin.exe', which matches the leading wildcard but fails to present the escaped '*' 449 | var detection13_negative = ` 450 | { 451 | "Image": "C:\\test\\bitsadmin.exe", 452 | "ParentImage": "\\leadingBackslash\\something.exe" 453 | } 454 | ` 455 | 456 | // won't match as the ParentImage is looking for '\leadingBackslash\*.exe' with a wildcard 457 | // this provides 'leadingBackslash\something.exe', missing the leading backslash 458 | var detection13_negative2 = ` 459 | { 460 | "Image": "C:\\test\\bits*admin.exe", 461 | "ParentImage": "leadingBackslash\\something.exe" 462 | } 463 | ` 464 | 465 | // won't match as the ParentImage is looking for an exact match (no wildcards) to 'full\*plaintext.exe' 466 | // this provides 'full\\*plaintext', the extra backslash kills it 467 | var detection13_negative3 = ` 468 | { 469 | "Image": "C:\\test\\bits*admin.exe", 470 | "ParentImage": "full\\\\*plaintext" 471 | } 472 | ` 473 | 474 | // shouldn't match on either of these (Image is missing 'Windows' in the bracket, ParentImage is missing the 475 | // a vaule of 000-aaa in the brackets) 476 | var detection13_negative4 = ` 477 | { 478 | "Image": "[-Security]\\image.cmd", 479 | "ParentImage": "{000-aaa}\\evil.exe" 480 | } 481 | ` 482 | 483 | // this has a hacky test; we set 'noCollapseWSNeg' in the parseTestCast struct for this case specifically 484 | // doing so will turn off collapsing the whitespace for the negative test and cause this to fail detection 485 | var detection14 = ` 486 | detection: 487 | condition: "selection" 488 | selection: 489 | SomeName|contains: 490 | - 'whitespace collapse testing' 491 | ` 492 | 493 | var detection14_case = ` 494 | { 495 | "SomeName": "whitespace\t\tcollapse testing" 496 | } 497 | ` 498 | 499 | var detection15 = ` 500 | detection: 501 | condition: "all of selection_* and 1 of option_*" 502 | selection_images: 503 | Image: 504 | - '*bits*admin.exe' 505 | selection_parent_images: 506 | ParentImage: 507 | - '*.exe' 508 | selection_bar: 509 | Baz: 510 | - '*bar*' 511 | option_1: 512 | Bar|contains: 513 | - 'Asdf' 514 | option_2: 515 | Test: 516 | - 123 517 | ` 518 | 519 | var detection15_positive1 = ` 520 | { 521 | "Image": "C:\\test\\bits\\aaa-admin.exe", 522 | "ParentImage": "\\leadingBackslash\\something.exe", 523 | "Baz": "foo bar baz", 524 | "Bar": "lalala Asdf [124]" 525 | } 526 | ` 527 | 528 | var detection15_negative1 = ` 529 | { 530 | "Image": "C:\\test\\bits\\aaa-admin.exe", 531 | "ParentImage": "\\leadingBackslash\\something.exe", 532 | "Baz": "foo bar baz", 533 | "Bar": "lalala Asd [124]" 534 | } 535 | ` 536 | 537 | var detection15_negative2 = ` 538 | { 539 | "Image": "C:\\test\\bits\\aaa-admin.exe", 540 | "ParentImage": "\\leadingBackslash\\something.exe", 541 | "Baz": "foo baz", 542 | "Bar": "lalala Asdf [124]" 543 | } 544 | ` 545 | 546 | var detection15_positive2 = ` 547 | { 548 | "Image": "C:\\test\\bits\\aaa-admin.exe", 549 | "ParentImage": "\\leadingBackslash\\something.exe", 550 | "Baz": "foo bar baz", 551 | "Test": 123 552 | } 553 | ` 554 | 555 | var detection15_negative3 = ` 556 | { 557 | "Image": "C:\\test\\bits\\aaa-admin.exe", 558 | "ParentImage": "\\leadingBackslash\\something.exe", 559 | "Baz": "foo bar baz", 560 | "Test": 124 561 | } 562 | ` 563 | 564 | var detection15_negative4 = ` 565 | { 566 | "Image": "C:\\test\\bits\\aaa-admin.exe", 567 | "ParentImage": "\\leadingBackslash\\something.exe", 568 | "Baz": "foo baz", 569 | "Test": 123 570 | } 571 | ` 572 | 573 | type parseTestCase struct { 574 | ID int 575 | Rule string 576 | Pos, Neg []string 577 | noCollapseWSNeg bool 578 | } 579 | 580 | var parseTestCases = []parseTestCase{ 581 | { 582 | ID: 1, 583 | Rule: detection1, 584 | Pos: []string{detection1_positive}, 585 | Neg: []string{detection1_negative1, detection1_negative2}, 586 | }, 587 | { 588 | ID: 2, 589 | Rule: detection2, 590 | Pos: []string{detection1_positive}, 591 | Neg: []string{detection1_negative1, detection1_negative2}, 592 | }, 593 | { 594 | ID: 3, 595 | Rule: detection3, 596 | Pos: []string{detection3_positive1, detection3_positive2}, 597 | Neg: []string{detection3_negative}, 598 | }, 599 | { 600 | ID: 4, 601 | Rule: detection4, 602 | Pos: []string{detection1_positive}, 603 | Neg: []string{detection1_negative1, detection1_negative2}, 604 | }, 605 | { 606 | ID: 5, 607 | Rule: detection5, 608 | Pos: []string{detection3_positive1, detection3_positive2}, 609 | Neg: []string{detection3_negative}, 610 | }, 611 | { 612 | ID: 6, 613 | Rule: detection6, 614 | Pos: []string{detection6_positive}, 615 | Neg: []string{detection6_negative}, 616 | }, 617 | { 618 | ID: 7, 619 | Rule: detection7, 620 | Pos: []string{detection3_positive1, detection3_positive2}, 621 | Neg: []string{detection7_negative1, detection7_negative2}, 622 | }, 623 | { 624 | ID: 8, 625 | Rule: detection8, 626 | Pos: []string{detection8_positive}, 627 | Neg: []string{detection8_negative1, detection8_negative2}, 628 | }, 629 | { 630 | ID: 9, 631 | Rule: detection9, 632 | Pos: []string{detection9_positive}, 633 | Neg: []string{detection9_negative}, 634 | }, 635 | { 636 | ID: 10, 637 | Rule: detection10, 638 | Pos: []string{detection10_positive}, 639 | Neg: []string{detection10_negative}, 640 | }, 641 | { 642 | ID: 11, 643 | Rule: detection11, 644 | Pos: []string{detection11_positive}, 645 | Neg: []string{detection11_negative}, 646 | }, 647 | { 648 | ID: 12, 649 | Rule: detection12, 650 | Pos: []string{detection12_positive}, 651 | Neg: []string{detection12_negative}, 652 | }, 653 | { 654 | ID: 13, 655 | Rule: detection13, 656 | Pos: []string{detection13_positive, detection13_positive2, detection13_positive3, detection13_positive4}, 657 | Neg: []string{detection13_negative, detection13_negative2, detection13_negative3, detection13_negative4}, 658 | }, 659 | { 660 | ID: 14, 661 | Rule: detection14, 662 | Pos: []string{detection14_case}, 663 | noCollapseWSNeg: false, // ensures whitespace is collapsed and everything matches 664 | }, 665 | { 666 | ID: 14, 667 | Rule: detection14, 668 | Neg: []string{detection14_case}, 669 | noCollapseWSNeg: true, // turns off whitespace collapsing and causing a non-match 670 | }, 671 | { 672 | ID: 15, 673 | Rule: detection15, 674 | Pos: []string{detection15_positive1, detection15_positive2}, 675 | Neg: []string{detection15_negative1, detection15_negative2, detection15_negative3, detection15_negative4}, 676 | }, 677 | } 678 | 679 | func TestTokenCollect(t *testing.T) { 680 | for _, c := range LexPosCases { 681 | p := &parser{ 682 | lex: lex(c.Expr), 683 | } 684 | if err := p.collect(); err != nil { 685 | switch err.(type) { 686 | case ErrUnsupportedToken: 687 | default: 688 | t.Fatal(err) 689 | } 690 | } 691 | } 692 | } 693 | 694 | func TestParse(t *testing.T) { 695 | for _, c := range parseTestCases { 696 | var rule Rule 697 | if err := yaml.Unmarshal([]byte(c.Rule), &rule); err != nil { 698 | t.Fatalf("rule parse case %d failed to unmarshal yaml, %s", c.ID, err) 699 | } 700 | expr := rule.Detection["condition"].(string) 701 | p := &parser{ 702 | lex: lex(expr), 703 | sigma: rule.Detection, 704 | noCollapseWS: c.noCollapseWSNeg, 705 | } 706 | if err := p.collect(); err != nil { 707 | t.Fatalf("rule parser case %d failed to collect lexical tokens, %s", c.ID, err) 708 | } 709 | if err := p.parse(); err != nil { 710 | switch err.(type) { 711 | case ErrWip: 712 | t.Fatalf("WIP") 713 | default: 714 | t.Fatalf("rule parser case %d failed to parse lexical tokens, %s", c.ID, err) 715 | } 716 | } 717 | } 718 | } 719 | 720 | func TestSigmaEscape(t *testing.T) { 721 | tests := []struct { 722 | name string 723 | input string 724 | expected string 725 | validMatch string 726 | skip bool 727 | }{ 728 | { 729 | name: "No_Change", 730 | input: `\\leadingBackslash\\*.exe`, 731 | expected: `\\leadingBackslash\\*.exe`, 732 | validMatch: `\leadingBackslash\testing.exe`, 733 | }, 734 | { 735 | name: "Leading_Single_Backslash_Wildcard_After_Slash", 736 | input: `\leadingBackslash\\*.exe`, 737 | expected: `\\leadingBackslash\\*.exe`, 738 | validMatch: `\leadingBackslash\testing.exe`, 739 | }, 740 | { 741 | name: "Leading_Wildcard_Single_Backslash_Esc_Wildcard", 742 | input: `*\bits\*admin.exe`, 743 | expected: `*\\bits\*admin.exe`, 744 | validMatch: `leading\bits*admin.exe`, 745 | }, 746 | { 747 | name: "Double_Leading_Backslash_Single_Backslash_Wildcard", 748 | input: `\\\\DoubleBackslash\some*.exe`, 749 | expected: `\\\\DoubleBackslash\\some*.exe`, 750 | validMatch: `\\DoubleBackslash\sometMatch.exe`, 751 | }, 752 | { 753 | name: "Plaintext_Only_Esc_Wildcard", 754 | input: `some\full\\\*plaintext.exe`, 755 | expected: `some\\full\\\*plaintext.exe`, 756 | validMatch: `some\full\*plaintext.exe`, 757 | }, 758 | { 759 | name: "Double_Leading_Backslash_Complex_Mix_Esc", 760 | input: `\\\\DoubleBackslash\?\some*Other\\*test.\\???`, 761 | expected: `\\\\DoubleBackslash\?\\some*Other\\*test.\\???`, 762 | validMatch: `\\DoubleBackslash?\someMixOther\wildcardtest.\cmd`, 763 | }, 764 | { 765 | name: "Mixed_Wildcards_Single_Backslash_Brackets", 766 | input: `[*]\*\aSetof\\\sigma{rule?}here*`, 767 | expected: `\[*\]\*\\aSetof\\\\sigma\{rule?\}here*`, 768 | validMatch: `[testing]*\aSetof\\sigma{rules}hereWeGo`, 769 | }, 770 | } 771 | for _, curTest := range tests { 772 | t.Run(curTest.name, func(t *testing.T) { 773 | if curTest.skip { 774 | t.Skip("test marked as skip") 775 | } 776 | 777 | escStr := escapeSigmaForGlob(curTest.input) 778 | if escStr != curTest.expected { 779 | t.Errorf("failed to validate escaped input; got: %s - expected: %s", escStr, curTest.expected) 780 | } 781 | 782 | // test as a glob to be sure 783 | globT, err := glob.Compile(escStr) 784 | if err != nil { 785 | t.Fatalf("failed to compile glob: %+v", err) 786 | } 787 | if !globT.Match(curTest.validMatch) { 788 | t.Errorf("compiled glob did NOT match valid input; glob: %s -- data: %s", escStr, curTest.validMatch) 789 | } 790 | }) 791 | } 792 | } 793 | -------------------------------------------------------------------------------- /pattern.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strings" 7 | 8 | "github.com/gobwas/glob" 9 | ) 10 | 11 | type TextPatternModifier int 12 | 13 | const ( 14 | TextPatternNone TextPatternModifier = iota 15 | TextPatternContains 16 | TextPatternPrefix 17 | TextPatternSuffix 18 | TextPatternAll 19 | TextPatternRegex 20 | TextPatternKeyword 21 | ) 22 | 23 | // func isValidSpecifier(in string) bool { 24 | // return in == "contains" || 25 | // in == "endswith" || 26 | // in == "startswith" 27 | // } 28 | 29 | // NumMatcher is an atomic pattern for numeric item or list of items 30 | type NumMatcher interface { 31 | // NumMatch implements NumMatcher 32 | NumMatch(int) bool 33 | } 34 | 35 | // NumMatchers holds multiple numeric matchers 36 | type NumMatchers []NumMatcher 37 | 38 | // NumMatch implements NumMatcher 39 | func (n NumMatchers) NumMatch(val int) bool { 40 | for _, v := range n { 41 | if v.NumMatch(val) { 42 | return true 43 | } 44 | } 45 | return false 46 | } 47 | 48 | func NewNumMatcher(patterns ...int) (NumMatcher, error) { 49 | if len(patterns) == 0 { 50 | return nil, fmt.Errorf("no patterns defined for matcher object") 51 | } 52 | matcher := make(NumMatchers, 0) 53 | for _, p := range patterns { 54 | matcher = append(matcher, NumPattern{Val: p}) 55 | } 56 | 57 | return func() NumMatcher { 58 | if len(matcher) == 1 { 59 | return matcher[0] 60 | } 61 | return matcher 62 | }(), nil 63 | } 64 | 65 | // StringMatcher is an atomic pattern that could implement glob, literal or regex matchers 66 | type StringMatcher interface { 67 | // StringMatch implements StringMatcher 68 | StringMatch(string) bool 69 | } 70 | 71 | var gWSCollapse = regexp.MustCompile(`\s+`) 72 | 73 | // handleWhitespace takes str and if the global configuration for collapsing whitespace is NOT turned off 74 | // returns the string with whitespace collapsed (1+ spaces, tabs, etc... become single space); otherwise 75 | // just returns the unmodified str; this only applies to non-regex rules and data hitting non-regex rules 76 | func handleWhitespace(str string, noCollapseWS bool) string { 77 | if noCollapseWS { // do we collapse whitespace or not? See config.NoCollapseWS (we collapse by default) 78 | return str 79 | } 80 | return gWSCollapse.ReplaceAllString(str, " ") 81 | } 82 | 83 | const ( 84 | sigmaSpecialWildcard = byte('*') 85 | sigmaSpecialSingle = byte('?') 86 | sigmaSpecialEscape = byte('\\') 87 | globSpecialSqrBrktLeft = byte('[') 88 | globSpecialSqrBrktRight = byte(']') 89 | globSpecialCurlBrktLeft = byte('{') 90 | globSpecialCurlBrktRight = byte('}') 91 | ) 92 | 93 | // Sigma has a different set of rules than the Glob library for escaping, so this function attempts to 94 | // translate from Sigma escaping to gobwas/glob escaping. For the most part we don't touch much of the 95 | // escaped string; generally only when we see an unbalanced escape'd backslash (ex. '\' in Sigma needs to 96 | // translated to '\\' for glob, '\\\' needs to translate to '\\\\', etc...). 97 | // 98 | // Generally we only need to really watch for runs of backslashes by themselves, in the case where you see 99 | // a special character ('?' or '*') with an escape, any run of additional escapes should be valid by convention 100 | // (e.g. '\\*' per Sigma is an escaped backslash with a wildcard while '\\\*' is an escaped backslash and escaped 101 | // wildcard). 102 | // 103 | // Simga escaping rules per spec: 104 | // * Plain backslash not followed by a wildcard can be expressed as single '\' or double backslash '\\'. For simplicity reasons the single notation is recommended. 105 | // * A wildcard has to be escaped to handle it as a plain character: '\*' 106 | // * The backslash before a wildcard has to be escaped to handle the value as a backslash followed by a wildcard: '\\*' 107 | // * Three backslashes are necessary to escape both, the backslash and the wildcard and handle them as plain values: '\\\*' 108 | // * Three or four backslashes are handled as double backslash. Four are recommended for consistency reasons: '\\\\' results in the plain value '\\' 109 | func escapeSigmaForGlob(str string) string { 110 | if str == "" { // quick out if empty 111 | return "" 112 | } 113 | 114 | // special "quotemeta"-like functionality for brackets in glob (they should be treated as plaintext) 115 | isBracket := func(b byte) bool { 116 | return b == globSpecialSqrBrktLeft || b == globSpecialSqrBrktRight || 117 | b == globSpecialCurlBrktLeft || b == globSpecialCurlBrktRight 118 | } 119 | 120 | sLen := len(str) 121 | replStr := make([]byte, 2*sLen) 122 | x := (2 * sLen) - 1 // end of the replStr; we're working backwards 123 | 124 | wildcard := false // we enter wildcard mode when we see a '?' or '*' and exit when we see something other than '\' or wildcard 125 | slashCnt := 0 // to simplify balancing runs of escaped backslashes (without wildcards), we just count the number we've seen in a row 126 | for i := (sLen - 1); i >= 0; i-- { 127 | switch str[i] { 128 | case sigmaSpecialWildcard, sigmaSpecialSingle: // wildcard is on when we see one of these characters 129 | wildcard = true 130 | case sigmaSpecialEscape: // character is an escape (backslash) 131 | if !wildcard { // if we're no in wildcard mode, count the number of slashes we're putting out to ensure they're balanced 132 | slashCnt++ 133 | } 134 | default: // any other character, ensure wildcard mode is off 135 | wildcard = false 136 | } 137 | 138 | // if we're no longer processing an escape character, check to see if we have a balanced count and if not, rebalance 139 | if str[i] != sigmaSpecialEscape && slashCnt > 0 { 140 | if (slashCnt % 2) != 0 { 141 | replStr[x] = sigmaSpecialEscape 142 | x-- // decrement x again as we're adding an extra char 143 | } 144 | slashCnt = 0 145 | } 146 | 147 | replStr[x] = str[i] // copy our current character to the output 148 | x-- 149 | 150 | // special escape case for square/curly brackets; we need to escape these for glob 151 | // as they have a special meaning in the glob library but not in Sigma 152 | if isBracket(str[i]) { 153 | replStr[x] = sigmaSpecialEscape 154 | x-- // decrement x again as we're adding an extra char 155 | } 156 | } 157 | 158 | // one last slash count before exiting to catch leading backslashes 159 | if (slashCnt % 2) != 0 { 160 | replStr[x] = sigmaSpecialEscape 161 | } else { 162 | x++ // for return, move back to the first valid characgter if we haven't added a compensating slash 163 | } 164 | 165 | return string(replStr[x:]) 166 | } 167 | 168 | func NewStringMatcher( 169 | mod TextPatternModifier, 170 | lower, all, noCollapseWS bool, 171 | patterns ...string, 172 | ) (StringMatcher, error) { 173 | if len(patterns) == 0 { 174 | return nil, fmt.Errorf("no patterns defined for matcher object") 175 | } 176 | matcher := make([]StringMatcher, 0) 177 | for _, p := range patterns { 178 | // process modifiers first 179 | switch mod { 180 | case TextPatternRegex: // regex per spec 181 | re, err := regexp.Compile(p) 182 | if err != nil { 183 | return nil, err 184 | } 185 | matcher = append(matcher, RegexPattern{Re: re}) 186 | case TextPatternContains: // contains: puts * wildcards around the values, such that the value is matched anywhere in the field. 187 | p = handleWhitespace(p, noCollapseWS) 188 | // In this condition, we need to ensure single backslashes, etc... are escaped correctly before throwing the globs on either side 189 | p = escapeSigmaForGlob(p) 190 | p = "*" + p + "*" 191 | globNG, err := glob.Compile(p) 192 | if err != nil { 193 | return nil, err 194 | } 195 | matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS}) 196 | case TextPatternSuffix: 197 | p = handleWhitespace(p, noCollapseWS) 198 | matcher = append(matcher, SuffixPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS}) 199 | case TextPatternPrefix: 200 | p = handleWhitespace(p, noCollapseWS) 201 | matcher = append(matcher, PrefixPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS}) 202 | default: 203 | // no (supported) modifiers, handle non-spec regex, globs and regular values 204 | if strings.HasPrefix(p, "/") && strings.HasSuffix(p, "/") { 205 | re, err := regexp.Compile(strings.TrimLeft(strings.TrimRight(p, "/"), "/")) 206 | if err != nil { 207 | return nil, err 208 | } 209 | matcher = append(matcher, RegexPattern{Re: re}) 210 | } else if mod == TextPatternKeyword { 211 | // this is a bit hacky, basically if the pattern coming in is a keyword and did not appear 212 | // to be a regex, always process it as a 'contains' style glob (can appear anywhere...) 213 | // this is due, I believe, on how keywords are generally handled, where it is likely a random 214 | // string or event long message that may have additional detail/etc... 215 | p = handleWhitespace(p, noCollapseWS) 216 | // In this condition, we need to ensure single backslashes, etc... are escaped correctly before throwing the globs on either side 217 | p = escapeSigmaForGlob(p) 218 | p = "*" + p + "*" 219 | globNG, err := glob.Compile(p) 220 | if err != nil { 221 | return nil, err 222 | } 223 | matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS}) 224 | } else if strings.Contains(p, "*") { 225 | p = handleWhitespace(p, noCollapseWS) 226 | // Do NOT call QuoteMeta here as we're assuming the author knows what they're doing... 227 | p = escapeSigmaForGlob(p) 228 | globNG, err := glob.Compile(p) 229 | if err != nil { 230 | return nil, err 231 | } 232 | matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS}) 233 | } else { 234 | p = handleWhitespace(p, noCollapseWS) 235 | matcher = append(matcher, ContentPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS}) 236 | } 237 | } 238 | } 239 | return func() StringMatcher { 240 | if len(matcher) == 1 { 241 | return matcher[0] 242 | } 243 | if all { 244 | return StringMatchersConj(matcher).Optimize() 245 | } 246 | return StringMatchers(matcher).Optimize() 247 | }(), nil 248 | } 249 | 250 | // StringMatchers holds multiple atomic matchers 251 | // Patterns are meant to be list of possibilities 252 | // thus, objects are joined with logical disjunctions 253 | type StringMatchers []StringMatcher 254 | 255 | // StringMatch implements StringMatcher 256 | func (s StringMatchers) StringMatch(msg string) bool { 257 | for _, m := range s { 258 | // I thought about a type assertion here for handling whitespace 259 | // however, as we're dealing with non-pointer types, that may cause 260 | // some added overhead that we can avoid by just implementing where need to 261 | if m.StringMatch(msg) { 262 | return true 263 | } 264 | } 265 | return false 266 | } 267 | 268 | // Optimize creates a new StringMatchers slice ordered by matcher type 269 | // First match wins, thus we can optimize by making sure fast string patterns 270 | // are executed first, then globs, and finally slow regular expressions 271 | func (s StringMatchers) Optimize() StringMatchers { 272 | return optimizeStringMatchers(s) 273 | } 274 | 275 | // StringMatchersConj is similar to StringMatcher but elements are joined with 276 | // conjunction, i.e. all patterns must match 277 | // used to implement "all" specifier for selection types 278 | type StringMatchersConj []StringMatcher 279 | 280 | // StringMatch implements StringMatcher 281 | func (s StringMatchersConj) StringMatch(msg string) bool { 282 | for _, m := range s { 283 | if !m.StringMatch(msg) { 284 | return false 285 | } 286 | } 287 | return true 288 | } 289 | 290 | // Optimize creates a new StringMatchers slice ordered by matcher type 291 | // First match wins, thus we can optimize by making sure fast string patterns 292 | // are executed first, then globs, and finally slow regular expressions 293 | func (s StringMatchersConj) Optimize() StringMatchersConj { 294 | return optimizeStringMatchers(s) 295 | } 296 | 297 | func optimizeStringMatchers(s []StringMatcher) []StringMatcher { 298 | globs := make([]StringMatcher, 0) 299 | re := make([]StringMatcher, 0) 300 | literals := make([]StringMatcher, 0) 301 | for _, pat := range s { 302 | switch pat.(type) { 303 | case ContentPattern, PrefixPattern, SuffixPattern: 304 | literals = append(literals, pat) 305 | case GlobPattern: 306 | globs = append(globs, pat) 307 | case RegexPattern: 308 | re = append(re, pat) 309 | } 310 | } 311 | return append(literals, append(globs, re...)...) 312 | } 313 | 314 | // ContentPattern is a token for literal content matching 315 | type ContentPattern struct { 316 | Token string 317 | Lowercase bool 318 | NoCollapseWS bool 319 | } 320 | 321 | // StringMatch implements StringMatcher 322 | func (c ContentPattern) StringMatch(msg string) bool { 323 | msg = handleWhitespace(msg, c.NoCollapseWS) 324 | return lowerCaseIfNeeded(msg, c.Lowercase) == lowerCaseIfNeeded(c.Token, c.Lowercase) 325 | } 326 | 327 | // PrefixPattern is a token for literal content matching 328 | type PrefixPattern struct { 329 | Token string 330 | Lowercase bool 331 | NoCollapseWS bool 332 | } 333 | 334 | // StringMatch implements StringMatcher 335 | func (c PrefixPattern) StringMatch(msg string) bool { 336 | msg = handleWhitespace(msg, c.NoCollapseWS) 337 | return strings.HasPrefix( 338 | lowerCaseIfNeeded(msg, c.Lowercase), 339 | lowerCaseIfNeeded(c.Token, c.Lowercase), 340 | ) 341 | } 342 | 343 | // SuffixPattern is a token for literal content matching 344 | type SuffixPattern struct { 345 | Token string 346 | Lowercase bool 347 | NoCollapseWS bool 348 | } 349 | 350 | // StringMatch implements StringMatcher 351 | func (c SuffixPattern) StringMatch(msg string) bool { 352 | msg = handleWhitespace(msg, c.NoCollapseWS) 353 | return strings.HasSuffix( 354 | lowerCaseIfNeeded(msg, c.Lowercase), 355 | lowerCaseIfNeeded(c.Token, c.Lowercase), 356 | ) 357 | } 358 | 359 | // RegexPattern is for matching messages with regular expresions 360 | type RegexPattern struct { 361 | Re *regexp.Regexp 362 | } 363 | 364 | // StringMatch implements StringMatcher 365 | func (r RegexPattern) StringMatch(msg string) bool { 366 | return r.Re.MatchString(msg) 367 | } 368 | 369 | // GlobPattern is similar to ContentPattern but allows for asterisk wildcards 370 | type GlobPattern struct { 371 | Glob *glob.Glob 372 | NoCollapseWS bool 373 | } 374 | 375 | // StringMatch implements StringMatcher 376 | func (g GlobPattern) StringMatch(msg string) bool { 377 | msg = handleWhitespace(msg, g.NoCollapseWS) 378 | return (*g.Glob).Match(msg) 379 | } 380 | 381 | // SimplePattern is a reference type to illustrate StringMatcher 382 | type SimplePattern struct { 383 | Token string 384 | NoCollapseWS bool 385 | } 386 | 387 | // StringMatch implements StringMatcher 388 | func (s SimplePattern) StringMatch(msg string) bool { 389 | msg = handleWhitespace(msg, s.NoCollapseWS) 390 | return strings.Contains(msg, s.Token) 391 | } 392 | 393 | func lowerCaseIfNeeded(str string, lower bool) string { 394 | if lower { 395 | return strings.ToLower(str) 396 | } 397 | return str 398 | } 399 | 400 | // NumPattern matches on numeric value 401 | type NumPattern struct { 402 | Val int 403 | } 404 | 405 | // NumMatch implements NumMatcher 406 | func (n NumPattern) NumMatch(val int) bool { 407 | return n.Val == val 408 | } 409 | -------------------------------------------------------------------------------- /rule.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | "gopkg.in/yaml.v2" 12 | ) 13 | 14 | // RuleHandle is a meta object containing all fields from raw yaml, but is enhanced to also 15 | // hold debugging info from the tool, such as source file path, etc 16 | type RuleHandle struct { 17 | Rule 18 | 19 | Path string `json:"path"` 20 | Multipart bool `json:"multipart"` 21 | NoCollapseWS bool `json:"noCollapseWS"` 22 | } 23 | 24 | // Rule defines raw rule conforming to sigma rule specification 25 | // https://github.com/Neo23x0/sigma/wiki/Specification 26 | // only meant to be used for parsing yaml that matches Sigma rule definition 27 | type Rule struct { 28 | Author string `yaml:"author" json:"author"` 29 | Description string `yaml:"description" json:"description"` 30 | Falsepositives []string `yaml:"falsepositives" json:"falsepositives"` 31 | Fields []string `yaml:"fields" json:"fields"` 32 | ID string `yaml:"id" json:"id"` 33 | Level string `yaml:"level" json:"level"` 34 | Title string `yaml:"title" json:"title"` 35 | Status string `yaml:"status" json:"status"` 36 | References []string `yaml:"references" json:"references"` 37 | 38 | Logsource `yaml:"logsource" json:"logsource"` 39 | Detection `yaml:"detection" json:"detection"` 40 | Tags `yaml:"tags" json:"tags"` 41 | } 42 | 43 | // HasTags returns true if the rule contains all provided tags, otherwise false 44 | func (r *Rule) HasTags(tags []string) bool { 45 | lookup := make(map[string]bool, len(r.Tags)) 46 | for _, tag := range r.Tags { 47 | lookup[tag] = true 48 | } 49 | for _, tag := range tags { 50 | if _, ok := lookup[tag]; !ok { 51 | return false 52 | } 53 | } 54 | return true 55 | } 56 | 57 | // RuleFromYAML parses yaml data into Rule object 58 | func RuleFromYAML(data []byte) (r Rule, err error) { 59 | err = yaml.Unmarshal(data, &r) 60 | return 61 | } 62 | 63 | // IsMultipart checks if rule is multipart 64 | func IsMultipart(data []byte) bool { 65 | return !bytes.HasPrefix(data, []byte("---")) && bytes.Contains(data, []byte("---")) 66 | } 67 | 68 | // NewRuleList reads a list of sigma rule paths and parses them to rule objects 69 | func NewRuleList(files []string, skip, noCollapseWS bool, tags []string) ([]RuleHandle, error) { 70 | if len(files) == 0 { 71 | return nil, fmt.Errorf("missing rule file list") 72 | } 73 | errs := make([]ErrParseYaml, 0) 74 | rules := make([]RuleHandle, 0) 75 | loop: 76 | for i, path := range files { 77 | data, err := os.ReadFile(path) 78 | if err != nil { 79 | return nil, err 80 | } 81 | r, err := RuleFromYAML(data) 82 | if err != nil { 83 | if skip { 84 | errs = append(errs, ErrParseYaml{ 85 | Path: path, 86 | Count: i, 87 | Err: err, 88 | }) 89 | continue loop 90 | } 91 | return nil, &ErrParseYaml{Err: err, Path: path} 92 | } 93 | 94 | if !r.HasTags(tags) { 95 | continue loop 96 | } 97 | 98 | rules = append(rules, RuleHandle{ 99 | Path: path, 100 | Rule: r, 101 | NoCollapseWS: noCollapseWS, 102 | Multipart: IsMultipart(data), 103 | }) 104 | } 105 | return rules, func() error { 106 | if len(errs) > 0 { 107 | return ErrBulkParseYaml{Errs: errs} 108 | } 109 | return nil 110 | }() 111 | } 112 | 113 | // Logsource represents the logsource field in sigma rule 114 | // It defines relevant event streams and is used for pre-filtering 115 | type Logsource struct { 116 | Product string `yaml:"product" json:"product"` 117 | Category string `yaml:"category" json:"category"` 118 | Service string `yaml:"service" json:"service"` 119 | Definition string `yaml:"definition" json:"definition"` 120 | } 121 | 122 | // Detection represents the detection field in sigma rule 123 | // contains condition expression and identifier fields for building AST 124 | type Detection map[string]interface{} 125 | 126 | func (d Detection) Extract() map[string]interface{} { 127 | tx := make(map[string]interface{}) 128 | for k, v := range d { 129 | if k != "condition" { 130 | tx[k] = v 131 | } 132 | } 133 | return tx 134 | } 135 | 136 | // Tags contains a metadata list for tying positive matches together with other threat intel sources 137 | // For example, for attaching MITRE ATT&CK tactics or techniques to the event 138 | type Tags []string 139 | 140 | // Result is an object returned on positive sigma match 141 | type Result struct { 142 | Tags `json:"tags"` 143 | 144 | ID string `json:"id"` 145 | Title string `json:"title"` 146 | Description string `json:"description"` 147 | } 148 | 149 | // Results should be returned when single event matches multiple rules 150 | type Results []Result 151 | 152 | // NewRuleFileList finds all yaml files from defined root directories 153 | // Subtree is scanned recursively 154 | // No file validation, other than suffix matching 155 | func NewRuleFileList(dirs []string) ([]string, error) { 156 | if len(dirs) == 0 { 157 | return nil, errors.New("rule directories undefined") 158 | } 159 | out := make([]string, 0) 160 | for _, dir := range dirs { 161 | if err := filepath.Walk(dir, func( 162 | path string, 163 | info os.FileInfo, 164 | err error, 165 | ) error { 166 | if !info.IsDir() && strings.HasSuffix(path, "yml") { 167 | out = append(out, path) 168 | } 169 | return err 170 | }); err != nil { 171 | return out, err 172 | } 173 | } 174 | return out, nil 175 | } 176 | -------------------------------------------------------------------------------- /ruleset.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "sync" 7 | ) 8 | 9 | // Config is used as argument to creating a new ruleset 10 | type Config struct { 11 | // root directory for recursive rule search 12 | // rules must be readable files with "yml" suffix 13 | Directory []string 14 | // by default, a rule parse fail will simply increment Ruleset.Failed counter when failing to 15 | // parse yaml or rule AST 16 | // this parameter will cause an early error return instead 17 | FailOnRuleParse, FailOnYamlParse bool 18 | // by default, we will collapse whitespace for both rules and data of non-regex rules and non-regex compared data 19 | // setthig this to true turns that behavior off 20 | NoCollapseWS bool 21 | } 22 | 23 | func (c Config) validate() error { 24 | if c.Directory == nil || len(c.Directory) == 0 { 25 | return fmt.Errorf("missing root directory for sigma rules") 26 | } 27 | for _, dir := range c.Directory { 28 | info, err := os.Stat(dir) 29 | if os.IsNotExist(err) { 30 | return fmt.Errorf("%s does not exist", dir) 31 | } 32 | if !info.IsDir() { 33 | return fmt.Errorf("%s is not a directory", dir) 34 | } 35 | } 36 | return nil 37 | } 38 | 39 | // Ruleset is a collection of rules 40 | type Ruleset struct { 41 | mu *sync.RWMutex 42 | 43 | Rules []*Tree 44 | root []string 45 | 46 | Total, Ok, Failed, Unsupported int 47 | } 48 | 49 | // NewRuleset instanciates a Ruleset object 50 | func NewRuleset(c Config, tags []string) (*Ruleset, error) { 51 | if err := c.validate(); err != nil { 52 | return nil, err 53 | } 54 | files, err := NewRuleFileList(c.Directory) 55 | if err != nil { 56 | return nil, err 57 | } 58 | var fail int 59 | rules, err := NewRuleList(files, !c.FailOnYamlParse, c.NoCollapseWS, tags) 60 | if err != nil { 61 | switch e := err.(type) { 62 | case ErrBulkParseYaml: 63 | fail += len(e.Errs) 64 | default: 65 | return nil, err 66 | } 67 | } 68 | result := RulesetFromRuleList(rules) 69 | result.root = c.Directory 70 | result.Failed += fail 71 | result.Total += fail 72 | return result, nil 73 | } 74 | 75 | func RulesetFromRuleList(rules []RuleHandle) *Ruleset { 76 | var fail, unsupp int 77 | set := make([]*Tree, 0) 78 | loop: 79 | for _, raw := range rules { 80 | if raw.Multipart { 81 | unsupp++ 82 | continue loop 83 | } 84 | tree, err := NewTree(raw) 85 | if err != nil { 86 | switch err.(type) { 87 | case ErrUnsupportedToken, *ErrUnsupportedToken: 88 | unsupp++ 89 | default: 90 | fail++ 91 | } 92 | continue loop 93 | } 94 | set = append(set, tree) 95 | } 96 | return &Ruleset{ 97 | mu: &sync.RWMutex{}, 98 | Rules: set, 99 | Failed: fail, 100 | Ok: len(set), 101 | Unsupported: unsupp, 102 | Total: len(rules), 103 | } 104 | } 105 | 106 | func (r *Ruleset) EvalAll(e Event) (Results, bool) { 107 | r.mu.RLock() 108 | defer r.mu.RUnlock() 109 | results := make(Results, 0) 110 | for _, rule := range r.Rules { 111 | if res, match := rule.Eval(e); match { 112 | results = append(results, *res) 113 | } 114 | } 115 | if len(results) > 0 { 116 | return results, true 117 | } 118 | return nil, false 119 | } 120 | -------------------------------------------------------------------------------- /sigma.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | // Keyworder implements keywords sigma rule type on arbitrary event 4 | // Should return list of fields that are relevant for rule matching 5 | type Keyworder interface { 6 | // Keywords implements Keyworder 7 | Keywords() ([]string, bool) 8 | } 9 | 10 | // Selector implements selection sigma rule type 11 | type Selector interface { 12 | // Select implements Selector 13 | Select(string) (interface{}, bool) 14 | } 15 | 16 | // Event implements sigma rule types by embedding Keyworder and Selector 17 | // Used by rules to extract relevant fields 18 | type Event interface { 19 | Keyworder 20 | Selector 21 | } 22 | 23 | // Matcher is used for implementing Abstract Syntax Tree for Sigma engine 24 | type Matcher interface { 25 | // Match implements Matcher 26 | Match(Event) (bool, bool) 27 | } 28 | 29 | // Branch implements Matcher with additional methods for walking and debugging the tree 30 | type Branch interface { 31 | Matcher 32 | 33 | // Self returns Node or final rule object for debugging and/or walking the tree 34 | // Must be type switched externally 35 | // Self() interface{} 36 | } 37 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/gobwas/glob" 7 | ) 8 | 9 | var eof = rune(0) 10 | 11 | // Item is lexical token along with respective plaintext value 12 | // Item is communicated between lexer and parser 13 | type Item struct { 14 | T Token 15 | Val string 16 | globVal *glob.Glob // Do NOT access directly, us the Item.Glob() function instead 17 | globCompFail bool // prevents us from trying to re-compile a failed globVal over and over... 18 | } 19 | 20 | func (i Item) String() string { return i.Val } 21 | 22 | // Item.Glob() - Wraps getting the compiled glob of Item.Val to ensure it is compiled properly. 23 | // Do NOT access globVal directly as it won't be compiled until the first call to Item.Glob() 24 | func (i *Item) Glob() *glob.Glob { 25 | if i.globVal == nil && !i.globCompFail { 26 | newVal := escapeSigmaForGlob(i.Val) 27 | newGlob, err := glob.Compile(newVal) 28 | if err != nil { 29 | i.globCompFail = true 30 | return nil 31 | } 32 | i.globVal = &newGlob 33 | } 34 | 35 | return i.globVal 36 | } 37 | 38 | func genItems(t []Item) <-chan Item { 39 | tx := make(chan Item) // unbuffered 40 | go func(ctx context.Context) { 41 | defer close(tx) 42 | for _, item := range t { 43 | tx <- item 44 | } 45 | }(context.TODO()) 46 | return tx 47 | } 48 | 49 | // Token is a lexical token extracted from condition field 50 | type Token int 51 | 52 | const ( 53 | TokBegin Token = iota 54 | 55 | // Helpers for internal stuff 56 | TokErr 57 | TokUnsupp 58 | TokNil 59 | 60 | // user-defined word 61 | TokIdentifier 62 | TokIdentifierWithWildcard 63 | TokIdentifierAll 64 | 65 | // Literals 66 | TokLitEof 67 | 68 | // Separators 69 | TokSepLpar 70 | TokSepRpar 71 | TokSepPipe 72 | 73 | // Operators 74 | TokOpEq 75 | TokOpGt 76 | TokOpGte 77 | TokOpLt 78 | TokOpLte 79 | 80 | // Keywords 81 | TokKeywordAnd 82 | TokKeywordOr 83 | TokKeywordNot 84 | TokKeywordAgg 85 | 86 | // TODO 87 | TokKeywordNear 88 | TokKeywordBy 89 | 90 | // Statements 91 | TokStOne 92 | TokStAll 93 | ) 94 | 95 | // String documents human readable textual value of token 96 | // For visual debugging, so symbols will be written out and everything is uppercased 97 | func (t Token) String() string { 98 | switch t { 99 | case TokIdentifier: 100 | return "IDENT" 101 | case TokIdentifierWithWildcard: 102 | return "WILDCARDIDENT" 103 | case TokIdentifierAll: 104 | return "THEM" 105 | case TokSepLpar: 106 | return "LPAR" 107 | case TokSepRpar: 108 | return "RPAR" 109 | case TokSepPipe: 110 | return "PIPE" 111 | case TokOpEq: 112 | return "EQ" 113 | case TokOpGt: 114 | return "GT" 115 | case TokOpGte: 116 | return "GTE" 117 | case TokOpLt: 118 | return "LT" 119 | case TokOpLte: 120 | return "LTE" 121 | case TokKeywordAnd: 122 | return "AND" 123 | case TokKeywordOr: 124 | return "OR" 125 | case TokKeywordNot: 126 | return "NOT" 127 | case TokStAll: 128 | return "ALL" 129 | case TokStOne: 130 | return "ONE" 131 | case TokKeywordAgg: 132 | return "AGG" 133 | case TokLitEof: 134 | return "EOF" 135 | case TokErr: 136 | return "ERR" 137 | case TokUnsupp: 138 | return "UNSUPPORTED" 139 | case TokBegin: 140 | return "BEGINNING" 141 | case TokNil: 142 | return "NIL" 143 | default: 144 | return "Unk" 145 | } 146 | } 147 | 148 | // Literal documents plaintext values of a token 149 | // Uses special symbols and expressions, as used in a rule 150 | func (t Token) Literal() string { 151 | switch t { 152 | case TokIdentifier, TokIdentifierWithWildcard: 153 | return "keywords" 154 | case TokIdentifierAll: 155 | return "them" 156 | case TokSepLpar: 157 | return "(" 158 | case TokSepRpar: 159 | return ")" 160 | case TokSepPipe: 161 | return "|" 162 | case TokOpEq: 163 | return "=" 164 | case TokOpGt: 165 | return ">" 166 | case TokOpGte: 167 | return ">=" 168 | case TokOpLt: 169 | return "<" 170 | case TokOpLte: 171 | return "<=" 172 | case TokKeywordAnd: 173 | return "and" 174 | case TokKeywordOr: 175 | return "or" 176 | case TokKeywordNot: 177 | return "not" 178 | case TokStAll: 179 | return "all of" 180 | case TokStOne: 181 | return "1 of" 182 | case TokLitEof, TokNil: 183 | return "" 184 | default: 185 | return "Err" 186 | } 187 | } 188 | 189 | // Rune returns UTF-8 numeric value of symbol 190 | func (t Token) Rune() rune { 191 | switch t { 192 | case TokSepLpar: 193 | return '(' 194 | case TokSepRpar: 195 | return ')' 196 | case TokSepPipe: 197 | return '|' 198 | default: 199 | return eof 200 | } 201 | } 202 | 203 | // validTokenSequence detects invalid token sequences 204 | // not meant to be a perfect validator, simply a quick check before parsing 205 | func validTokenSequence(t1, t2 Token) bool { 206 | switch t2 { 207 | case TokStAll, TokStOne: 208 | switch t1 { 209 | case TokBegin, TokSepLpar, TokKeywordAnd, TokKeywordOr, TokKeywordNot: 210 | return true 211 | } 212 | case TokIdentifierAll: 213 | switch t1 { 214 | case TokStAll, TokStOne: 215 | return true 216 | } 217 | case TokIdentifier, TokIdentifierWithWildcard: 218 | switch t1 { 219 | case TokSepLpar, TokBegin, TokKeywordAnd, TokKeywordOr, TokKeywordNot, TokStOne, TokStAll: 220 | return true 221 | } 222 | case TokKeywordAnd, TokKeywordOr: 223 | switch t1 { 224 | case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepRpar: 225 | return true 226 | } 227 | case TokKeywordNot: 228 | switch t1 { 229 | case TokKeywordAnd, TokKeywordOr, TokSepLpar, TokBegin: 230 | return true 231 | } 232 | case TokSepLpar: 233 | switch t1 { 234 | case TokKeywordAnd, TokKeywordOr, TokKeywordNot, TokBegin, TokSepLpar: 235 | return true 236 | } 237 | case TokSepRpar: 238 | switch t1 { 239 | case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepLpar, TokSepRpar: 240 | return true 241 | } 242 | case TokLitEof: 243 | switch t1 { 244 | case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepRpar: 245 | return true 246 | } 247 | case TokSepPipe: 248 | switch t1 { 249 | case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepRpar: 250 | return true 251 | } 252 | } 253 | return false 254 | } 255 | -------------------------------------------------------------------------------- /tree.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gobwas/glob" 7 | ) 8 | 9 | // Tree represents the full AST for a sigma rule 10 | type Tree struct { 11 | Root Branch 12 | Rule *RuleHandle 13 | } 14 | 15 | // Match implements Matcher 16 | func (t Tree) Match(e Event) (bool, bool) { 17 | return t.Root.Match(e) 18 | } 19 | 20 | func (t Tree) Eval(e Event) (*Result, bool) { 21 | match, applicable := t.Match(e) 22 | if !applicable { 23 | return nil, false 24 | } 25 | if t.Rule == nil && match { 26 | return &Result{}, true 27 | } 28 | if match { 29 | return &Result{ 30 | ID: t.Rule.ID, 31 | Title: t.Rule.Title, 32 | Tags: t.Rule.Tags, 33 | Description: t.Rule.Description, 34 | }, true 35 | } 36 | return nil, false 37 | } 38 | 39 | // NewTree parses rule handle into an abstract syntax tree 40 | func NewTree(r RuleHandle) (*Tree, error) { 41 | if r.Detection == nil { 42 | return nil, ErrMissingDetection{} 43 | } 44 | expr, ok := r.Detection["condition"].(string) 45 | if !ok { 46 | return nil, ErrMissingCondition{} 47 | } 48 | 49 | p := &parser{ 50 | lex: lex(expr), 51 | condition: expr, 52 | sigma: r.Detection, 53 | noCollapseWS: r.NoCollapseWS, 54 | } 55 | if err := p.run(); err != nil { 56 | return nil, err 57 | } 58 | t := &Tree{ 59 | Root: p.result, 60 | Rule: &r, 61 | } 62 | return t, nil 63 | } 64 | 65 | // newBranch builds a binary tree from token list 66 | // sequence and group validation should be done before invoking newBranch 67 | func newBranch(d Detection, t []Item, depth int, noCollapseWS bool) (Branch, error) { 68 | rx := genItems(t) 69 | 70 | and := make(NodeSimpleAnd, 0) 71 | or := make(NodeSimpleOr, 0) 72 | var negated bool 73 | var wildcard Token 74 | 75 | for item := range rx { 76 | switch item.T { 77 | case TokIdentifier: 78 | val, ok := d[item.Val] 79 | if !ok { 80 | return nil, ErrMissingConditionItem{Key: item.Val} 81 | } 82 | b, err := newRuleFromIdent(val, checkIdentType(item.Val, val), noCollapseWS) 83 | if err != nil { 84 | return nil, err 85 | } 86 | and = append(and, newNodeNotIfNegated(b, negated)) 87 | negated = false 88 | case TokKeywordAnd: 89 | // no need to do anything special here 90 | case TokKeywordOr: 91 | // fill OR gate with collected AND nodes 92 | // reduce will strip AND logic if only one token has been collected 93 | or = append(or, and.Reduce()) 94 | // reset existing AND collector 95 | and = make(NodeSimpleAnd, 0) 96 | case TokKeywordNot: 97 | negated = true 98 | case TokSepLpar: 99 | // recursively create new branch and append to existing list 100 | // then skip to next token after grouping 101 | b, err := newBranch(d, extractGroup(rx), depth+1, noCollapseWS) 102 | if err != nil { 103 | return nil, err 104 | } 105 | and = append(and, newNodeNotIfNegated(b, negated)) 106 | negated = false 107 | case TokIdentifierAll: 108 | switch wildcard { 109 | case TokStAll: 110 | rules, err := extractAllToRules(d, noCollapseWS) 111 | if err != nil { 112 | return nil, err 113 | } 114 | and = append(and, newNodeNotIfNegated(NodeSimpleAnd(rules), negated)) 115 | negated = false 116 | case TokStOne: 117 | rules, err := extractAllToRules(d, noCollapseWS) 118 | if err != nil { 119 | return nil, err 120 | } 121 | and = append(and, newNodeNotIfNegated(NodeSimpleOr(rules), negated)) 122 | negated = false 123 | default: 124 | return nil, fmt.Errorf("invalid wildcard ident, missing 1 of/ all of prefix") 125 | } 126 | case TokIdentifierWithWildcard: 127 | switch wildcard { 128 | case TokStAll: 129 | // build logical conjunction 130 | rules, err := extractAndBuildBranches(d, item.Glob(), noCollapseWS) 131 | if err != nil { 132 | return nil, fmt.Errorf("failed to extract and build branch for '%s': %s", item, err) 133 | } 134 | and = append(and, newNodeNotIfNegated(NodeSimpleAnd(rules), negated)) 135 | negated = false 136 | case TokStOne: 137 | // build logical disjunction 138 | rules, err := extractAndBuildBranches(d, item.Glob(), noCollapseWS) 139 | if err != nil { 140 | return nil, fmt.Errorf("failed to extract and build branch for '%s': %s", item, err) 141 | } 142 | and = append(and, newNodeNotIfNegated(NodeSimpleOr(rules), negated)) 143 | negated = false 144 | default: 145 | // invalid case, did not see 1of/allof statement before wildcard ident 146 | return nil, fmt.Errorf("invalid wildcard ident, missing 1 of/ all of prefix") 147 | } 148 | wildcard = TokBegin 149 | case TokStAll: 150 | wildcard = TokStAll 151 | case TokStOne: 152 | wildcard = TokStOne 153 | case TokSepRpar: 154 | return nil, fmt.Errorf("parser error, should not see %s", 155 | TokSepRpar) 156 | default: 157 | return nil, ErrUnsupportedToken{ 158 | Msg: fmt.Sprintf("%s | %s", item.T, item.T.Literal()), 159 | } 160 | } 161 | } 162 | or = append(or, newNodeNotIfNegated(and.Reduce(), negated)) 163 | 164 | return or.Reduce(), nil 165 | } 166 | 167 | func extractGroup(rx <-chan Item) []Item { 168 | // fn is called when newBranch hits TokSepLpar 169 | // it will be consumed, so balance is already 1 170 | balance := 1 171 | group := make([]Item, 0) 172 | for item := range rx { 173 | if balance > 0 { 174 | group = append(group, item) 175 | } 176 | switch item.T { 177 | case TokSepLpar: 178 | balance++ 179 | case TokSepRpar: 180 | balance-- 181 | if balance == 0 { 182 | return group[:len(group)-1] 183 | } 184 | default: 185 | } 186 | } 187 | return group 188 | } 189 | 190 | func extractAndBuildBranches(d Detection, g *glob.Glob, noCollapseWS bool) ([]Branch, error) { 191 | vals, err := extractWildcardIdents(d, g) 192 | if err != nil { 193 | return nil, err 194 | } 195 | rules := make(NodeSimpleAnd, len(vals)) 196 | for i, v := range vals { 197 | b, err := newRuleFromIdent(v, identSelection, noCollapseWS) 198 | if err != nil { 199 | return nil, err 200 | } 201 | rules[i] = b 202 | } 203 | return rules, nil 204 | } 205 | 206 | func extractWildcardIdents(d Detection, g *glob.Glob) ([]interface{}, error) { 207 | if g == nil { 208 | return nil, fmt.Errorf("passed glob was nil (failed to compile)") 209 | } 210 | rules := make([]interface{}, 0) 211 | for k, v := range d { 212 | if (*g).Match(k) { 213 | rules = append(rules, v) 214 | } 215 | } 216 | if len(rules) == 0 { 217 | return nil, fmt.Errorf("ident did not match any values") 218 | } 219 | return rules, nil 220 | } 221 | 222 | func extractAllToRules(d Detection, noCollapseWS bool) ([]Branch, error) { 223 | rules := make([]Branch, 0) 224 | for k, v := range d.Extract() { 225 | b, err := newRuleFromIdent(v, checkIdentType(k, v), noCollapseWS) 226 | if err != nil { 227 | return nil, err 228 | } 229 | rules = append(rules, b) 230 | } 231 | return rules, nil 232 | } 233 | -------------------------------------------------------------------------------- /tree_test.go: -------------------------------------------------------------------------------- 1 | package sigma 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | 7 | "github.com/markuskont/datamodels" 8 | "gopkg.in/yaml.v2" 9 | ) 10 | 11 | func TestTreeParse(t *testing.T) { 12 | for _, c := range parseTestCases { 13 | var rule Rule 14 | if err := yaml.Unmarshal([]byte(c.Rule), &rule); err != nil { 15 | t.Fatalf("tree parse case %d failed to unmarshal yaml, %s", c.ID, err) 16 | } 17 | p, err := NewTree(RuleHandle{Rule: rule, NoCollapseWS: c.noCollapseWSNeg}) 18 | if err != nil { 19 | t.Fatalf("tree parse case %d failed: %s", c.ID, err) 20 | } 21 | // Positive cases 22 | for i, c2 := range c.Pos { 23 | var obj datamodels.Map 24 | if err := json.Unmarshal([]byte(c2), &obj); err != nil { 25 | t.Fatalf("rule parser case %d positive case %d json unmarshal error %s", c.ID, i, err) 26 | } 27 | m, _ := p.Match(obj) 28 | if !m { 29 | t.Fatalf("rule parser case %d positive case %d did not match", c.ID, i) 30 | } 31 | } 32 | // Negative cases 33 | for i, c2 := range c.Neg { 34 | var obj datamodels.Map 35 | if err := json.Unmarshal([]byte(c2), &obj); err != nil { 36 | t.Fatalf("rule parser case %d positive case %d json unmarshal error %s", c.ID, i, err) 37 | } 38 | m, _ := p.Match(obj) 39 | if m { 40 | t.Fatalf("rule parser case %d negative case %d matched", c.ID, i) 41 | } 42 | } 43 | } 44 | } 45 | 46 | // we should probably add an alternative to this benchmark to include noCollapseWS on or off (we collapse by default now) 47 | func benchmarkCase(b *testing.B, rawRule, rawEvent string) { 48 | var rule Rule 49 | if err := yaml.Unmarshal([]byte(parseTestCases[0].Rule), &rule); err != nil { 50 | b.Fail() 51 | } 52 | p, err := NewTree(RuleHandle{Rule: rule}) 53 | if err != nil { 54 | b.Fail() 55 | } 56 | var event datamodels.Map 57 | if err := json.Unmarshal([]byte(parseTestCases[0].Pos[0]), &event); err != nil { 58 | b.Fail() 59 | } 60 | for i := 0; i < b.N; i++ { 61 | p.Match(event) 62 | } 63 | } 64 | 65 | func BenchmarkTreePositive0(b *testing.B) { 66 | benchmarkCase(b, parseTestCases[0].Rule, parseTestCases[0].Pos[0]) 67 | } 68 | 69 | func BenchmarkTreePositive1(b *testing.B) { 70 | benchmarkCase(b, parseTestCases[1].Rule, parseTestCases[1].Pos[0]) 71 | } 72 | 73 | func BenchmarkTreePositive2(b *testing.B) { 74 | benchmarkCase(b, parseTestCases[2].Rule, parseTestCases[2].Pos[0]) 75 | } 76 | 77 | func BenchmarkTreePositive3(b *testing.B) { 78 | benchmarkCase(b, parseTestCases[3].Rule, parseTestCases[3].Pos[0]) 79 | } 80 | 81 | func BenchmarkTreePositive4(b *testing.B) { 82 | benchmarkCase(b, parseTestCases[4].Rule, parseTestCases[4].Pos[0]) 83 | } 84 | 85 | func BenchmarkTreePositive5(b *testing.B) { 86 | benchmarkCase(b, parseTestCases[5].Rule, parseTestCases[6].Pos[0]) 87 | } 88 | 89 | func BenchmarkTreePositive6(b *testing.B) { 90 | benchmarkCase(b, parseTestCases[6].Rule, parseTestCases[6].Pos[0]) 91 | } 92 | 93 | func BenchmarkTreeNegative0(b *testing.B) { 94 | benchmarkCase(b, parseTestCases[0].Rule, parseTestCases[0].Neg[0]) 95 | } 96 | 97 | func BenchmarkTreeNegative1(b *testing.B) { 98 | benchmarkCase(b, parseTestCases[1].Rule, parseTestCases[1].Neg[0]) 99 | } 100 | 101 | func BenchmarkTreeNegative2(b *testing.B) { 102 | benchmarkCase(b, parseTestCases[2].Rule, parseTestCases[2].Neg[0]) 103 | } 104 | 105 | func BenchmarkTreeNegative3(b *testing.B) { 106 | benchmarkCase(b, parseTestCases[3].Rule, parseTestCases[3].Neg[0]) 107 | } 108 | 109 | func BenchmarkTreeNegative4(b *testing.B) { 110 | benchmarkCase(b, parseTestCases[4].Rule, parseTestCases[4].Neg[0]) 111 | } 112 | 113 | func BenchmarkTreeNegative5(b *testing.B) { 114 | benchmarkCase(b, parseTestCases[5].Rule, parseTestCases[6].Neg[0]) 115 | } 116 | 117 | func BenchmarkTreeNegative6(b *testing.B) { 118 | benchmarkCase(b, parseTestCases[6].Rule, parseTestCases[6].Neg[0]) 119 | } 120 | --------------------------------------------------------------------------------