├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── errors.go
├── examples
    ├── parse-rules
    │   └── main.go
    ├── simple-rule-mapping
    │   ├── data.json
    │   ├── main.go
    │   ├── mapping_results.json
    │   └── windows
    │   │   └── proc_creation_win_wmic_recon_group.yml
    ├── simple-streamer
    │   └── main.go
    └── threaded-streamer
    │   └── main.go
├── go.mod
├── go.sum
├── ident.go
├── ident_test.go
├── lexer.go
├── lexer_test.go
├── nodes.go
├── parser.go
├── parser_test.go
├── pattern.go
├── rule.go
├── ruleset.go
├── sigma.go
├── token.go
├── tree.go
└── tree_test.go


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test go code
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master, next-* ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 | 
16 |     - name: Set up Go
17 |       uses: actions/setup-go@v2
18 |       with:
19 |         go-version: 1.18
20 | 
21 |     - name: Build
22 |       run: go build -v ./...
23 | 
24 |     - name: Test Sigma v2 package
25 |       run: go test -v ./...
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, build with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                 Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # go-sigma-rule-engine
  2 | 
  3 | > Golang library that implements a sigma log rule parser and match engine.
  4 | 
  5 | [Sigma](https://github.com/Neo23x0/sigma) is a open and vendor-agnostic signature format for logs. Official sigma repository includes rule format definition, a public ruleset, and python tooling for converting rules into various SIEM alert formats. Essentially, it fills the same role in logging space as Suricata does in packet capture and YARA for file analysis. However, unlike those projects, the open Sigma project does not act as a match engine. Users are still expected to run a supported SIEM or log management solution, with necessary licencing to enable alerting features.
  6 | 
  7 | This project implements a rule parser and real-time match engine in Golang, to provide a lightweight alternative to those SIEM systems. Essentially, it's just a ~3000 line library that can be used by anyone to build their own IDS for logs. Initial version was experimental hack, cobbled together at the last minute with minimal testing, that was used by Crossed Swords 2020 exercise, organized by NATO CCDCOE. Yellow team log post-processor relied on the engine to detect Red team activities in gamenet targets, in real time. This code is archived into `pkg/sigma/v1`.
  8 | 
  9 | Since then, I rewrote the entire engine to provide a cleaner and more tested version as reference to anyone interested in building their own IDS for logs. This code can be found in `pkg/sigma/v2`. The project also includes a cli application in `cmd/` folder, written with [cobra](https://github.com/spf13/cobra). However, it is meant to be used as reference and testing, rather than a fully-fledged tool.
 10 | 
 11 | # Basic usage
 12 | 
 13 | Simply pull the code using `go get`.
 14 | 
 15 | ```
 16 | go get -u github.com/markuskont/go-sigma-rule-engine/
 17 | ```
 18 | 
 19 | Then import the library into your project. **PS** - this path was refactored to project root in `0.3`, as initial layout was not suitable to library project. Please update your imports accordingly when upgrading from `0.2`.
 20 | 
 21 | ```go
 22 | import (
 23 | 	"github.com/markuskont/go-sigma-rule-engine"
 24 | )
 25 | ```
 26 | 
 27 | ```go
 28 | ruleset, err := sigma.NewRuleset(sigma.Config{
 29 |   Directory: viper.GetStringSlice("rules.dir"),
 30 | })
 31 | if err != nil {
 32 |   return err
 33 | }
 34 | logrus.Debugf("Found %d files, %d ok, %d failed, %d unsupported",
 35 |   ruleset.Total, ruleset.Ok, ruleset.Failed, ruleset.Unsupported)
 36 | ```
 37 | 
 38 | Events can then be evaluated against full ruleset.
 39 | 
 40 | ```go
 41 | if result, match := ruleset.EvalAll(e); match {
 42 |   // handle match results here here
 43 | }
 44 | ```
 45 | 
 46 | Individual rules could also be manually looped. For example, when early return is desired for avoiding full ruleset evaluation.
 47 | 
 48 | ```go
 49 | for _, rule := range ruleset.Rules {
 50 |   if rule.Match(e) {
 51 |     // handle rule match here
 52 |   }
 53 | }
 54 | ```
 55 | 
 56 | Note that variable `e` should implement `Event` interface.
 57 | 
 58 | ## Matcher and Event
 59 | 
 60 | Our Sigma rule is built as a tree where each node must satisfy the `Matcher` interface that performs boolean evaluation for events.
 61 | 
 62 | ```go
 63 | type Matcher interface {
 64 | 	Match(Event) bool
 65 | }
 66 | ```
 67 | 
 68 | There are simply too many possible event formats for our simple Sigma library to handle. Therefore, users are expected to implement `Event` interface for any object that will be matched against the ruleset. This `Event` interface embeds field access methods for two Sigma rule types - `keyword` and `selection`.
 69 | 
 70 | ```go
 71 | // Keyworder implements keywords sigma rule type on arbitrary event
 72 | // Should return list of fields that are relevant for rule matching
 73 | type Keyworder interface {
 74 | 	// Keywords implements Keyworder
 75 | 	Keywords() ([]string, bool)
 76 | }
 77 | 
 78 | // Selector implements selection sigma rule type
 79 | type Selector interface {
 80 | 	// Select implements Selector
 81 | 	Select(string) (interface{}, bool)
 82 | }
 83 | 
 84 | // Event implements sigma rule types by embedding Keyworder and Selector
 85 | // Used by rules to extract relevant fields
 86 | type Event interface {
 87 | 	Keyworder
 88 | 	Selector
 89 | }
 90 | ```
 91 | 
 92 | [Helper function source file](/pkg/sigma/v2/helpers.go) provides an example for handling dynamic hash maps.
 93 | 
 94 | ### Keywords
 95 | 
 96 | `Keywords` rule type is simply a list of patters that must exist in core message. Only one pattern must match.
 97 | 
 98 | ```yaml
 99 |   keywords:
100 |     - 'wget * - http* | perl'
101 |     - 'wget * - http* | sh'
102 |     - 'wget * - http* | bash'
103 |     - 'python -m SimpleHTTPServer'
104 | ```
105 | 
106 | Thus, the `Keyworder` interface simply returns a list of unstructured fields that could be considered core messages. It is built around `slice` because some event types, like Windows EventLog, could contain multiple fields that might contain this information. And returning a `nil` slice is cleaner than empty string when keyword rule type does not apply to event. However, in that case the second return value should always be `false` to ensure early return when rule does not apply to particular message.
107 | 
108 | ```go
109 | type Keyworder interface {
110 | 	Keywords() ([]string, bool)
111 | }
112 | ```
113 | 
114 | Dynamic JSON objects can be implemented as stub because this rule type does not support key-value lookups.
115 | 
116 | ```go
117 | // Keywords implements Keyworder
118 | func (s DynamicMap) Keywords() ([]string, bool) {
119 | 	return nil, false
120 | }
121 | ```
122 | 
123 | Alternatively, structs for well-known and standardized messages, such as BSD syslog, might simply return the `Message` field.
124 | 
125 | ```go
126 | func (m Syslog) Keywords() ([]string, bool) {
127 | 	return m.Message.Keywords()
128 | }
129 | ```
130 | 
131 | That message could be a unstructured string that also implements our `Event` interface.
132 | 
133 | ```go
134 | type Message string
135 | 
136 | func (m Message) Keywords() ([]string, bool) {
137 | 	return []string{string(m)}, true
138 | }
139 | ```
140 | 
141 | Dynamic structured events, like Suricata EVE, could have well known fields that might qualify as message.
142 | 
143 | ```go
144 | func (s DynamicMap) Keywords() ([]string, bool) {
145 | 	if val, ok := s.Select("alert.signature"); ok {
146 | 		if str, ok := val.(string); ok {
147 | 			return []string{str}, true
148 | 		}
149 | 	}
150 | 	return nil, false
151 | }
152 | ```
153 | 
154 | Multiple fields could be extracted and passed to the rule with this method. For example, `payload_printable`, `alert.category`, etc.
155 | 
156 | ### Selection
157 | 
158 | This rule type is for key-value lookups.
159 | 
160 | ```yaml
161 |   selection:
162 |     winlog.event_data.ScriptBlockText:
163 |     - ' -FromBase64String'
164 | ```
165 | 
166 | Like with `keyword`, this rule type might simply may not apply to some events.
167 | 
168 | ```go
169 | func (s UnstructuredEvent) Select(key string) (interface{}, bool) {
170 | 	return nil, false
171 | }
172 | ```
173 | 
174 | Otherwise, dynamic maps might simply implement it as wrapper for key-value lookup.
175 | 
176 | ```go
177 | func (s DynamicMap) Select(key string) (interface{}, bool) {
178 | 	if val, ok := d[key]; ok {
179 | 		return val, true
180 | 	}
181 | 	return nil, false
182 | }
183 | ```
184 | 
185 | Static structs for well-standardized event formats may simply handle these lookups manually.
186 | 
187 | ```go
188 | type Syslog struct {
189 | 	Timestamp time.Time `json:"@timestamp"`
190 | 	Host      string    `json:"host"`
191 | 	Program   string    `json:"program"`
192 | 	Pid       int       `json:"pid"`
193 | 	Severity  int       `json:"severity"`
194 | 	Facility  int       `json:"facility"`
195 | 	Sender    net.IP    `json:"ip"`
196 | 
197 | 	Message `json:"message"`
198 | }
199 | 
200 | func (m Syslog) Select(key string) (interface{}, bool) {
201 | 	switch key {
202 | 	case "timestamp", "@timestamp":
203 | 		return m.Timestamp, true
204 | 	case "host":
205 | 		return m.Host, true
206 | 	case "program":
207 | 		return m.Program, true
208 | 	case "pid":
209 | 		return m.Pid, true
210 | 	case "severity":
211 | 		return m.Severity, true
212 | 	case "facility":
213 | 		return m.Facility, true
214 | 	case "sender":
215 | 		if m.Sender == nil {
216 | 			return nil, false
217 | 		}
218 | 		return m.Sender.String(), true
219 | 	case "message", "msg":
220 | 		return m.Keywords(), true
221 | 	default:
222 | 		return nil, false
223 | 	}
224 | }
225 | ```
226 | 
227 | # Performance
228 | 
229 | ```go
230 | BenchmarkTreePositive0-12         867567              1363 ns/op
231 | BenchmarkTreePositive1-12         862962              1494 ns/op
232 | BenchmarkTreePositive2-12         795531              1380 ns/op
233 | BenchmarkTreePositive3-12         854679              1393 ns/op
234 | BenchmarkTreePositive4-12         884188              1364 ns/op
235 | BenchmarkTreePositive5-12         809140              1390 ns/op
236 | BenchmarkTreePositive6-12         773706              1410 ns/op
237 | BenchmarkTreeNegative0-12         776173              1385 ns/op
238 | BenchmarkTreeNegative1-12         812887              1481 ns/op
239 | BenchmarkTreeNegative2-12         850477              1401 ns/op
240 | BenchmarkTreeNegative3-12         840723              1390 ns/op
241 | BenchmarkTreeNegative4-12         819126              1417 ns/op
242 | BenchmarkTreeNegative5-12         748514              1416 ns/op
243 | BenchmarkTreeNegative6-12         856683              1382 ns/op
244 | ```
245 | 
246 | # Limitations
247 | 
248 | **Ruleset is not thread safe**. Nor can it be easily deep-copied due to possible pointers behind interfaces and pattern containers. Each worker thread should instantiate independent ruleset. However, public sigma ruleset only produces about ~500 rules, so overhead is currently trivial.
249 | 
250 | **Library is built around distinct rules, rather than entire ruleset**. That means that each rule could run separate map lookups and no data is shared between them. While individual rules are quite efficient, even in current unoptimized form, passing each event thought entire ruleset means traversing hundreds of rules. Thus having significant performance overhead. For example, we measured that passing an ECS formatted Windows EventLog message through all Windows rules in public Sigma ruleset took 4.5 times the amount of time that was otherwise spent on simply decoding the message.
251 | 
252 | **Ruleset splitting and pre-filtering must be handled by the user.** Sigma has `logsource` field to indicate which events should be evaluated against a rule. We simply handled this externally, parsing rules into a map of smaller rulesets. So, we had separate rulesets for Syslog, Snoopy, Suricata and EventLog. Logsource field was used to determine which ruleset was executed for event.
253 | 
254 | **No support for aggregations or event correlation.** Relatively small amount of Sigma rules use aggregations with `count() > N` or `Near()` keywords. Implementing them in streaming scenario is quite complex, as they require sharing state between messages over sliding window. Thus requiring full event correlation to be implemented. However, this did not fit our present concurrency model where N workers load balance over common message channel and no information is shared between them. Future work.
255 | 


--------------------------------------------------------------------------------
/errors.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"reflect"
  7 | )
  8 | 
  9 | // ErrInvalidRegex contextualizes broken regular expressions presented by the user
 10 | type ErrInvalidRegex struct {
 11 | 	Pattern string
 12 | 	Err     error
 13 | }
 14 | 
 15 | // Error implements error
 16 | func (e ErrInvalidRegex) Error() string {
 17 | 	return fmt.Sprintf("/%s/ %s", e.Pattern, e.Err)
 18 | }
 19 | 
 20 | // ErrMissingDetection indicates missing detection field
 21 | type ErrMissingDetection struct{}
 22 | 
 23 | func (e ErrMissingDetection) Error() string { return "sigma rule is missing detection field" }
 24 | 
 25 | // ErrMissingConditionItem indicates that identifier in condition is missing in detection map
 26 | type ErrMissingConditionItem struct {
 27 | 	Key string
 28 | }
 29 | 
 30 | func (e ErrMissingConditionItem) Error() string {
 31 | 	return fmt.Sprintf("missing condition identifier %s", e.Key)
 32 | }
 33 | 
 34 | // ErrEmptyDetection indicates detection field present but empty
 35 | type ErrEmptyDetection struct{}
 36 | 
 37 | func (e ErrEmptyDetection) Error() string { return "sigma rule has detection but is empty" }
 38 | 
 39 | // ErrMissingCondition indicates missing condition field
 40 | type ErrMissingCondition struct{}
 41 | 
 42 | func (e ErrMissingCondition) Error() string { return "complex sigma rule is missing condition" }
 43 | 
 44 | // ErrIncompleteDetection indicates a rule has defined identifiers that are missing in detection map
 45 | type ErrIncompleteDetection struct {
 46 | 	Condition string
 47 | 	Keys      []string
 48 | 	Msg       string
 49 | }
 50 | 
 51 | func (e ErrIncompleteDetection) Error() string {
 52 | 	return fmt.Sprintf(
 53 | 		"incomplete rule, missing fields from condition. [%s]. Has %+v. %s",
 54 | 		e.Condition,
 55 | 		func() []string {
 56 | 			if e.Keys != nil {
 57 | 				return e.Keys
 58 | 			}
 59 | 			return []string{}
 60 | 		}(),
 61 | 		e.Msg,
 62 | 	)
 63 | }
 64 | 
 65 | // ErrUnsupportedToken is a parser error indicating lexical token that is not yet supported
 66 | // Meant to be used as informational warning, rather than application breaking error
 67 | type ErrUnsupportedToken struct{ Msg string }
 68 | 
 69 | func (e ErrUnsupportedToken) Error() string { return fmt.Sprintf("UNSUPPORTED TOKEN: %s", e.Msg) }
 70 | 
 71 | // ErrWip indicates a rule expression that is currently Work In Progress
 72 | // Functions like ErrUnsupportedToken but indicates that feature is under active development
 73 | // Non-critical escape hatch while debugging
 74 | type ErrWip struct{}
 75 | 
 76 | func (e ErrWip) Error() string { return "work in progress" }
 77 | 
 78 | // ErrParseYaml indicates YAML parsing error
 79 | type ErrParseYaml struct {
 80 | 	Path  string
 81 | 	Err   error
 82 | 	Count int
 83 | }
 84 | 
 85 | func (e ErrParseYaml) Error() string {
 86 | 	return fmt.Sprintf("%d - File: %s; Err: %s", e.Count, e.Path, e.Err)
 87 | }
 88 | 
 89 | // ErrGotBrokenYamlFiles is a bulk error handler for dealing with broken sigma rules
 90 | // Some rules are bound to fail, no reason to exit entire application
 91 | // Individual errors can be collected and returned at the end
 92 | // Called decides if they should be only reported or it warrants full exit
 93 | type ErrBulkParseYaml struct {
 94 | 	Errs []ErrParseYaml
 95 | }
 96 | 
 97 | func (e ErrBulkParseYaml) Error() string {
 98 | 	return fmt.Sprintf("got %d broken yaml files", len(e.Errs))
 99 | }
100 | 
101 | // ErrInvalidTokenSeq indicates expression syntax error from rule writer
102 | // For example, two indents should be separated by a logical AND / OR operator
103 | type ErrInvalidTokenSeq struct {
104 | 	Prev, Next Item
105 | 	Collected  []Item
106 | }
107 | 
108 | func (e ErrInvalidTokenSeq) Error() string {
109 | 	return fmt.Sprintf(`seq error after collecting %d elements.`+
110 | 		` Invalid token sequence %s -> %s. Values: %s -> %s.`,
111 | 		len(e.Collected), e.Prev.T, e.Next.T, e.Prev.Val, e.Next.Val)
112 | }
113 | 
114 | // ErrIncompleteTokenSeq is invoked when lex channel drain does not end with EOF
115 | // thus indicating incomplete lexing sequence
116 | type ErrIncompleteTokenSeq struct {
117 | 	Expression string
118 | 	Items      []Item
119 | 	Last       Item
120 | }
121 | 
122 | func (e ErrIncompleteTokenSeq) Error() string {
123 | 	return fmt.Sprintf("last element should be EOF, got token %s with value %s",
124 | 		e.Last.T.String(), e.Last.Val)
125 | }
126 | 
127 | // ErrInvalidKeywordConstruct indicates that parser found a keyword expression
128 | // that did not match any known keyword rule structure
129 | // could be unmarshal issue
130 | type ErrInvalidKeywordConstruct struct {
131 | 	Msg  string
132 | 	Expr interface{}
133 | }
134 | 
135 | func (e ErrInvalidKeywordConstruct) Error() string {
136 | 	return fmt.Sprintf(`invalid type for parsing keyword expression. `+
137 | 		`Should be slice of strings or a funky one element map where value is slice of strings. `+
138 | 		`Or other stuff. Got |%+v| with type |%s|`,
139 | 		e.Expr, reflect.TypeOf(e.Expr).String())
140 | }
141 | 
142 | // ErrInvalidSelectionConstruct indicates that parser found a selection expression
143 | // that did not match any known selection rule structure
144 | // could be unmarshal issue
145 | type ErrInvalidSelectionConstruct struct {
146 | 	Msg  string
147 | 	Expr interface{}
148 | }
149 | 
150 | func (e ErrInvalidSelectionConstruct) Error() string {
151 | 	return fmt.Sprintf("invalid type for parsing selection expression. Got |%+v| with type |%s|",
152 | 		e.Expr, reflect.TypeOf(e.Expr).String())
153 | }
154 | 
155 | // ErrInvalidKind indicates that type switching function received an unsupported
156 | // or unhandled data type
157 | // Contains the type in question, arbitrary error text and keyword/selection indicator
158 | // Critical is used to indicate if this error should cause an exit or can simply
159 | // be handled as a warning for future improvements
160 | type ErrInvalidKind struct {
161 | 	reflect.Kind
162 | 	Msg      string
163 | 	T        identType
164 | 	Critical bool
165 | }
166 | 
167 | func (e ErrInvalidKind) Error() string {
168 | 	return fmt.Sprintf("%s data type error. %s got %s. %s",
169 | 		func() string {
170 | 			if e.Critical {
171 | 				return "CRITICAL"
172 | 			}
173 | 			return "Informative"
174 | 		}(), e.T, e.Kind, e.Msg)
175 | }
176 | 
177 | // ErrUnsupportedExpression indicates that rule expression is not yet supported by parser
178 | // mostly a type issue
179 | type ErrUnsupportedExpression struct {
180 | 	Msg      string
181 | 	T        identType
182 | 	Expr     interface{}
183 | 	Critical bool
184 | }
185 | 
186 | func (e ErrUnsupportedExpression) Error() string {
187 | 	return fmt.Sprintf("%s unsupported expression for %s, %s. %+v",
188 | 		func() string {
189 | 			if e.Critical {
190 | 				return "CRITICAL"
191 | 			}
192 | 			return "Informative"
193 | 		}(), e.T, e.Msg, e.Expr)
194 | }
195 | 
196 | // ErrUnableToReflect indicates that kind reflection could not be done, as
197 | // typeOf returned a nil value
198 | // likely a missing pattern
199 | var ErrUnableToReflect = errors.New("unable to reflect on pattern kind")
200 | 


--------------------------------------------------------------------------------
/examples/parse-rules/main.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright © 2020 Markus Kont alias013@gmail.com
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package main
17 | 
18 | import (
19 | 	"flag"
20 | 	"log"
21 | 	"strings"
22 | 
23 | 	"github.com/markuskont/go-sigma-rule-engine"
24 | )
25 | 
26 | type counts struct {
27 | 	ok, fail, unsupported int
28 | }
29 | 
30 | var (
31 | 	flagRuleDir = flag.String("rules-dir", "", "Directories containing rules. Multiple can be defined with semicolon as separator.")
32 | )
33 | 
34 | func main() {
35 | 	flag.Parse()
36 | 	files, err := sigma.NewRuleFileList(strings.Split(*flagRuleDir, ";"))
37 | 	if err != nil {
38 | 		log.Fatal(err)
39 | 	}
40 | 	for _, f := range files {
41 | 		log.Println(f)
42 | 	}
43 | 	log.Println("Parsing rule yaml files")
44 | 	rules, err := sigma.NewRuleList(files, true, false, nil)
45 | 	if err != nil {
46 | 		switch err.(type) {
47 | 		case sigma.ErrBulkParseYaml:
48 | 			log.Println(err)
49 | 		default:
50 | 			log.Fatal(err)
51 | 		}
52 | 	}
53 | 	log.Printf("Got %d rules from yaml\n", len(rules))
54 | 	log.Println("Parsing rules into AST")
55 | 	c := &counts{}
56 | loop:
57 | 	for _, raw := range rules {
58 | 		log.Print(raw.Path)
59 | 		if raw.Multipart {
60 | 			c.unsupported++
61 | 			continue loop
62 | 		}
63 | 		_, err := sigma.NewTree(raw)
64 | 		if err != nil {
65 | 			switch err.(type) {
66 | 			case sigma.ErrUnsupportedToken:
67 | 				c.unsupported++
68 | 				log.Printf("%s: %s\n", err, raw.Path)
69 | 			default:
70 | 				c.fail++
71 | 				log.Printf("%s\n", err)
72 | 			}
73 | 		} else {
74 | 			log.Printf("%s: ok\n", raw.Path)
75 | 			c.ok++
76 | 		}
77 | 	}
78 | 	log.Printf("OK: %d; FAIL: %d; UNSUPPORTED: %d\n", c.ok, c.fail, c.unsupported)
79 | }
80 | 


--------------------------------------------------------------------------------
/examples/simple-rule-mapping/data.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "CommandLine":"powershell.exe -ExecutionPolicy Bypass -C \"wmic.exe group get name\"",
 4 |         "Company":"Microsoft Corporation",
 5 |         "CurrentDirectory":"C:\\Users\\victim1\\Downloads\\winlogbeat-7.5.2-windows-x86_64\\winlogbeat-7.17.9-windows-x86_64\\",
 6 |         "Description":"Windows PowerShell",
 7 |         "EventRecordID":"35114",
 8 |         "FileVersion":"10.0.19041.546 (WinBuild.160101.0800)",
 9 |         "Hashes":"SHA256=9F914D42706FE215501044ACD85A32D58AAEF1419D404FDDFA5D3B48F66CCD9F",
10 |         "Image":"C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe",
11 |         "IntegrityLevel":"High",
12 |         "LogonGuid":"{fbe589a2-781c-63f8-ec89-070000000000}",
13 |         "LogonId":"0x00000000000789ec",
14 |         "OriginalFileName":"PowerShell.EXE",
15 |         "ParentCommandLine":"\"C:\\Users\\Public\\splunkd.exe\" -server http://192.168.1.5:8888 -group red",
16 |         "ParentImage":"C:\\Users\\Public\\splunkd.exe",
17 |         "ParentProcessGuid":"{fbe589a2-ff98-63fe-150e-000000001200}",
18 |         "ParentProcessId":"5484",
19 |         "ParentUser":"DESKTOP-IDQQB81\\victim1",
20 |         "ProcessGuid":"{fbe589a2-4f85-6401-0c1a-000000001200}",
21 |         "ProcessId":"1264","Product":"Microsoft® Windows® Operating System",
22 |         "RuleName":"-",
23 |         "TerminalSessionId":"1",
24 |         "User":"DESKTOP-IDQQB81\\victim1",
25 |         "UtcTime":"2023-03-03 01:38:13.179"
26 |     },
27 |     {
28 |         "CommandLine":"\"C:\\Windows\\System32\\Wbem\\WMIC.exe\" group get name",
29 |         "Company":"Microsoft Corporation",
30 |         "CurrentDirectory":"C:\\Users\\victim1\\Downloads\\winlogbeat-7.5.2-windows-x86_64\\winlogbeat-7.17.9-windows-x86_64\\",
31 |         "Description":"WMI Commandline Utility",
32 |         "EventRecordID":"35115",
33 |         "FileVersion":"10.0.19041.1741 (WinBuild.160101.0800)",
34 |         "Hashes":"SHA256=12ABB45620A7A1FFD8BB953DEBA3FCC30B8BA14B2FF523F1F519BF2BF6BA7D4C",
35 |         "Image":"C:\\Windows\\System32\\wbem\\WMIC.exe",
36 |         "IntegrityLevel":"High",
37 |         "LogonGuid":"{fbe589a2-781c-63f8-ec89-070000000000}",
38 |         "LogonId":"0x00000000000789ec",
39 |         "OriginalFileName":"wmic.exe",
40 |         "ParentCommandLine":"powershell.exe -ExecutionPolicy Bypass -C \"wmic.exe group get name\"",
41 |         "ParentImage":"C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe",
42 |         "ParentProcessGuid":"{fbe589a2-4f85-6401-0c1a-000000001200}",
43 |         "ParentProcessId":"1264","ParentUser":"DESKTOP-IDQQB81\\victim1",
44 |         "ProcessGuid":"{fbe589a2-4f85-6401-0d1a-000000001200}",
45 |         "ProcessId":"5572",
46 |         "Product":"Microsoft® Windows® Operating System",
47 |         "RuleName":"-",
48 |         "TerminalSessionId":"1",
49 |         "User":"DESKTOP-IDQQB81\\victim1",
50 |         "UtcTime":"2023-03-03 01:38:13.584"
51 |     }
52 |     ]


--------------------------------------------------------------------------------
/examples/simple-rule-mapping/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"flag"
 6 | 	"io/ioutil"
 7 | 	"log"
 8 | 	"os"
 9 | 	"strings"
10 | 
11 | 	"github.com/markuskont/datamodels"
12 | 	"github.com/markuskont/go-sigma-rule-engine"
13 | )
14 | 
15 | var (
16 | 	flagRuleSetPath = flag.String("path-ruleset", "./windows/", "Root folders for Sigma rules. Semicolon delimits paths.")
17 | )
18 | 
19 | func saveJSONToFile(filename string, data []interface{}) error {
20 | 	var jsonData []byte
21 | 	for _, d := range data {
22 | 		dJSON, err := json.Marshal(d)
23 | 		if err != nil {
24 | 			return err
25 | 		}
26 | 		jsonData = append(jsonData, dJSON...)
27 | 		jsonData = append(jsonData, '\n')
28 | 	}
29 | 	return ioutil.WriteFile(filename, jsonData, 0644)
30 | }
31 | func main() {
32 | 
33 | 	log.Println("start job")
34 | 	flag.Parse()
35 | 
36 | 	if *flagRuleSetPath == "" {
37 | 		log.Fatal("ruleset path not configured")
38 | 	}
39 | 
40 | 	ruleset, err := sigma.NewRuleset(sigma.Config{
41 | 		Directory:       strings.Split(*flagRuleSetPath, ";"),
42 | 		NoCollapseWS:    false,
43 | 		FailOnRuleParse: false,
44 | 		FailOnYamlParse: false,
45 | 	}, nil)
46 | 	if err != nil {
47 | 		log.Fatal(err)
48 | 	}
49 | 
50 | 	data, err := ioutil.ReadFile("./data.json")
51 | 
52 | 	if err != nil {
53 | 		log.Fatal(err)
54 | 	}
55 | 
56 | 	var events []map[string]interface{}
57 | 
58 | 	if err := json.Unmarshal([]byte(data), &events); err != nil {
59 | 		panic(err)
60 | 	}
61 | 	cnt := 0
62 | 	hit := 0
63 | 	sigmaResults := []interface{}{}
64 | 	for _, event := range events {
65 | 
66 | 		jsonStr, err := json.Marshal(event)
67 | 
68 | 		if err != nil {
69 | 			log.Println(err)
70 | 		}
71 | 
72 | 		var obj datamodels.Map
73 | 		if err := json.Unmarshal(jsonStr, &obj); err != nil {
74 | 			log.Println(err)
75 | 		}
76 | 
77 | 		if results, ok := ruleset.EvalAll(obj); ok && len(results) > 0 {
78 | 			obj["sigma_results"] = results
79 | 			if err != nil {
80 | 				log.Println(err)
81 | 			}
82 | 			sigmaResults = append(sigmaResults, obj)
83 | 
84 | 			hit += 1
85 | 		}
86 | 		cnt += 1
87 | 
88 | 	}
89 | 	log.Println("total dataset : ", cnt)
90 | 	log.Println("total hit rule : ", hit)
91 | 	file, err := os.Create("./mapping_results.json")
92 | 	encoder := json.NewEncoder(file)
93 | 	encoder.SetIndent("", "  ")
94 | 	encoder.Encode(sigmaResults)
95 | }
96 | 


--------------------------------------------------------------------------------
/examples/simple-rule-mapping/mapping_results.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "CommandLine": "\"C:\\Windows\\System32\\Wbem\\WMIC.exe\" group get name",
 4 |     "Company": "Microsoft Corporation",
 5 |     "CurrentDirectory": "C:\\Users\\victim1\\Downloads\\winlogbeat-7.5.2-windows-x86_64\\winlogbeat-7.17.9-windows-x86_64\\",
 6 |     "Description": "WMI Commandline Utility",
 7 |     "EventRecordID": "35115",
 8 |     "FileVersion": "10.0.19041.1741 (WinBuild.160101.0800)",
 9 |     "Hashes": "SHA256=12ABB45620A7A1FFD8BB953DEBA3FCC30B8BA14B2FF523F1F519BF2BF6BA7D4C",
10 |     "Image": "C:\\Windows\\System32\\wbem\\WMIC.exe",
11 |     "IntegrityLevel": "High",
12 |     "LogonGuid": "{fbe589a2-781c-63f8-ec89-070000000000}",
13 |     "LogonId": "0x00000000000789ec",
14 |     "OriginalFileName": "wmic.exe",
15 |     "ParentCommandLine": "powershell.exe -ExecutionPolicy Bypass -C \"wmic.exe group get name\"",
16 |     "ParentImage": "C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe",
17 |     "ParentProcessGuid": "{fbe589a2-4f85-6401-0c1a-000000001200}",
18 |     "ParentProcessId": "1264",
19 |     "ParentUser": "DESKTOP-IDQQB81\\victim1",
20 |     "ProcessGuid": "{fbe589a2-4f85-6401-0d1a-000000001200}",
21 |     "ProcessId": "5572",
22 |     "Product": "Microsoft® Windows® Operating System",
23 |     "RuleName": "-",
24 |     "TerminalSessionId": "1",
25 |     "User": "DESKTOP-IDQQB81\\victim1",
26 |     "UtcTime": "2023-03-03 01:38:13.584",
27 |     "sigma_results": [
28 |       {
29 |         "tags": [
30 |           "attack.discovery",
31 |           "attack.t1069.001"
32 |         ],
33 |         "id": "164eda96-11b2-430b-85ff-6a265c15bf32",
34 |         "title": "Local Groups Reconnaissance Via Wmic.EXE",
35 |         "description": "Detects the execution of \"wmic\" with the \"group\" flag.\nAdversaries may attempt to find local system groups and permission settings.\nThe knowledge of local system permission groups can help adversaries determine which groups exist and which users belong to a particular group.\nAdversaries may use this information to determine which users have elevated permissions, such as the users found within the local administrators group.\n"
36 |       }
37 |     ]
38 |   }
39 | ]
40 | 


--------------------------------------------------------------------------------
/examples/simple-rule-mapping/windows/proc_creation_win_wmic_recon_group.yml:
--------------------------------------------------------------------------------
 1 | title: Local Groups Reconnaissance Via Wmic.EXE
 2 | id: 164eda96-11b2-430b-85ff-6a265c15bf32
 3 | status: experimental
 4 | description: |
 5 |     Detects the execution of "wmic" with the "group" flag.
 6 |     Adversaries may attempt to find local system groups and permission settings.
 7 |     The knowledge of local system permission groups can help adversaries determine which groups exist and which users belong to a particular group.
 8 |     Adversaries may use this information to determine which users have elevated permissions, such as the users found within the local administrators group.
 9 | references:
10 |     - https://github.com/redcanaryco/atomic-red-team/blob/f339e7da7d05f6057fdfcdd3742bfcf365fee2a9/atomics/T1069.001/T1069.001.md
11 | author: frack113
12 | date: 2021/12/12
13 | modified: 2023/02/14
14 | tags:
15 |     - attack.discovery
16 |     - attack.t1069.001
17 | logsource:
18 |     product: windows
19 |     category: process_creation
20 | detection:
21 |     selection_img:
22 |         - Image|endswith: '\wmic.exe'
23 |         - OriginalFileName: 'wmic.exe'
24 |     selection_cli:
25 |         CommandLine|contains: ' group'
26 |     condition: all of selection*
27 | falsepositives:
28 |     - Unknown
29 | level: low
30 | 


--------------------------------------------------------------------------------
/examples/simple-streamer/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"encoding/json"
 6 | 	"flag"
 7 | 	"log"
 8 | 	"os"
 9 | 	"strings"
10 | 
11 | 	"github.com/markuskont/datamodels"
12 | 	"github.com/markuskont/go-sigma-rule-engine"
13 | )
14 | 
15 | var (
16 | 	flagRuleSetPath = flag.String("path-ruleset", "", "Root folders for Sigma rules. Semicolon delimits paths.")
17 | )
18 | 
19 | func main() {
20 | 	flag.Parse()
21 | 	if *flagRuleSetPath == "" {
22 | 		log.Fatal("ruleset path not configured")
23 | 	}
24 | 	ruleset, err := sigma.NewRuleset(sigma.Config{
25 | 		Directory:       strings.Split(*flagRuleSetPath, ";"),
26 | 		NoCollapseWS:    false,
27 | 		FailOnRuleParse: false,
28 | 		FailOnYamlParse: false,
29 | 	}, nil)
30 | 	if err != nil {
31 | 		log.Fatal(err)
32 | 	}
33 | 	scanner := bufio.NewScanner(bufio.NewReader(os.Stdin))
34 | 	output := os.Stdout
35 | loop:
36 | 	for scanner.Scan() {
37 | 		var obj datamodels.Map
38 | 		if err := json.Unmarshal(scanner.Bytes(), &obj); err != nil {
39 | 			log.Println(err)
40 | 			continue loop
41 | 		}
42 | 		if results, ok := ruleset.EvalAll(obj); ok && len(results) > 0 {
43 | 			obj["sigma_results"] = results
44 | 			encoded, err := json.Marshal(obj)
45 | 			if err != nil {
46 | 				log.Println(err)
47 | 				continue loop
48 | 			}
49 | 			output.Write(append(encoded, []byte("\n")...))
50 | 		}
51 | 	}
52 | }
53 | 


--------------------------------------------------------------------------------
/examples/threaded-streamer/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"encoding/json"
 6 | 	"flag"
 7 | 	"log"
 8 | 	"os"
 9 | 	"strings"
10 | 	"sync"
11 | 
12 | 	"github.com/markuskont/datamodels"
13 | 	"github.com/markuskont/go-sigma-rule-engine"
14 | )
15 | 
16 | var (
17 | 	flagRuleSetPath = flag.String("path-ruleset", "", "Root folders for Sigma rules. Semicolon delimits paths.")
18 | 	flagWorkers     = flag.Int("workers", 4, "Number of async workers")
19 | )
20 | 
21 | func main() {
22 | 	flag.Parse()
23 | 	if *flagRuleSetPath == "" {
24 | 		log.Fatal("ruleset path not configured")
25 | 	}
26 | 	if *flagWorkers <= 0 {
27 | 		log.Fatal("invalid worker count")
28 | 	}
29 | 
30 | 	// ruleset setup
31 | 	ruleset, err := sigma.NewRuleset(sigma.Config{
32 | 		Directory:       strings.Split(*flagRuleSetPath, ";"),
33 | 		NoCollapseWS:    false,
34 | 		FailOnRuleParse: false,
35 | 		FailOnYamlParse: false,
36 | 	}, nil)
37 | 	if err != nil {
38 | 		log.Fatal(err)
39 | 	}
40 | 
41 | 	// syncing setup
42 | 	var wg sync.WaitGroup
43 | 	defer wg.Wait()
44 | 	ch := make(chan []byte, *flagWorkers)
45 | 
46 | 	// workers setup
47 | 	for i := 0; i < *flagWorkers; i++ {
48 | 		wg.Add(1)
49 | 		go func() {
50 | 			defer wg.Done()
51 | 			output := os.Stdout
52 | 		loop:
53 | 			for data := range ch {
54 | 				var obj datamodels.Map
55 | 				if err := json.Unmarshal(data, &obj); err != nil {
56 | 					log.Println(err)
57 | 					continue loop
58 | 				}
59 | 				if results, ok := ruleset.EvalAll(obj); ok && len(results) > 0 {
60 | 					obj["sigma_results"] = results
61 | 					encoded, err := json.Marshal(obj)
62 | 					if err != nil {
63 | 						log.Println(err)
64 | 						continue loop
65 | 					}
66 | 					output.Write(append(encoded, []byte("\n")...))
67 | 				}
68 | 			}
69 | 		}()
70 | 	}
71 | 
72 | 	// scanner setup
73 | 	wg.Add(1)
74 | 	go func() {
75 | 		defer wg.Done()
76 | 		defer close(ch)
77 | 		scanner := bufio.NewScanner(bufio.NewReader(os.Stdin))
78 | 		for scanner.Scan() {
79 | 			// need to copy the bytes as scanner.Bytes is modified in place
80 | 			cpy := make([]byte, len(scanner.Bytes()))
81 | 			copy(cpy, scanner.Bytes())
82 | 			ch <- cpy
83 | 		}
84 | 	}()
85 | }
86 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/markuskont/go-sigma-rule-engine
 2 | 
 3 | go 1.18
 4 | 
 5 | require (
 6 | 	github.com/gobwas/glob v0.2.3
 7 | 	github.com/markuskont/datamodels v0.0.1
 8 | 	gopkg.in/yaml.v2 v2.4.0
 9 | )
10 | 
11 | require (
12 | 	github.com/kr/text v0.2.0 // indirect
13 | 	github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
14 | 	gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
15 | )
16 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 2 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 3 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
 4 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
 5 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 6 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 7 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 8 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 9 | github.com/markuskont/datamodels v0.0.1 h1:Pibmdtfp4hTypvmFmmCPIkSPxUZ6rpi/myd8U9F/5y4=
10 | github.com/markuskont/datamodels v0.0.1/go.mod h1:dyie+4X2Pmask9qB6PS89+Xq6v0Hjm+anprlucH1JcA=
11 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
12 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
13 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
14 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
16 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
17 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
18 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
19 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
20 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
21 | 


--------------------------------------------------------------------------------
/ident.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"reflect"
  7 | 	"strconv"
  8 | 	"strings"
  9 | )
 10 | 
 11 | type identType int
 12 | 
 13 | func (i identType) String() string {
 14 | 	switch i {
 15 | 	case identKeyword:
 16 | 		return "KEYWORD"
 17 | 	case identSelection:
 18 | 		return "SELECTION"
 19 | 	default:
 20 | 		return "UNK"
 21 | 	}
 22 | }
 23 | 
 24 | const (
 25 | 	identErr identType = iota
 26 | 	identSelection
 27 | 	identKeyword
 28 | )
 29 | 
 30 | func checkIdentType(name string, data interface{}) identType {
 31 | 	t := reflectIdentKind(data)
 32 | 	if strings.HasPrefix(name, "keyword") {
 33 | 		if data == nil {
 34 | 			return identKeyword
 35 | 		}
 36 | 		if t != identKeyword {
 37 | 			return identErr
 38 | 		}
 39 | 	}
 40 | 	return t
 41 | }
 42 | 
 43 | func reflectIdentKind(data interface{}) identType {
 44 | 	switch v := data.(type) {
 45 | 	case map[string]interface{}, map[interface{}]interface{}:
 46 | 		return identSelection
 47 | 	case []interface{}:
 48 | 		k, ok := isSameKind(v)
 49 | 		if !ok {
 50 | 			return identErr
 51 | 		}
 52 | 		switch k {
 53 | 		case reflect.Map:
 54 | 			return identSelection
 55 | 		default:
 56 | 			return identKeyword
 57 | 		}
 58 | 	default:
 59 | 		return identKeyword
 60 | 	}
 61 | }
 62 | 
 63 | func newRuleFromIdent(rule interface{}, kind identType, noCollapseWS bool) (Branch, error) {
 64 | 	switch kind {
 65 | 	case identKeyword:
 66 | 		return NewKeyword(rule, noCollapseWS)
 67 | 	case identSelection:
 68 | 		return NewSelectionBranch(rule, noCollapseWS)
 69 | 	}
 70 | 	return nil, fmt.Errorf("unknown rule kind, should be keyword or selection")
 71 | }
 72 | 
 73 | // Keyword is a container for patterns joined by logical disjunction
 74 | type Keyword struct {
 75 | 	S StringMatcher
 76 | 	stats
 77 | }
 78 | 
 79 | // Match implements Matcher
 80 | func (k Keyword) Match(msg Event) (bool, bool) {
 81 | 	msgs, ok := msg.Keywords()
 82 | 	if !ok {
 83 | 		return false, false
 84 | 	}
 85 | 	for _, m := range msgs {
 86 | 		if k.S.StringMatch(m) {
 87 | 			return true, true
 88 | 		}
 89 | 	}
 90 | 	return false, true
 91 | }
 92 | 
 93 | func NewKeyword(expr interface{}, noCollapseWS bool) (*Keyword, error) {
 94 | 	switch val := expr.(type) {
 95 | 	case []string:
 96 | 		return newStringKeyword(TextPatternKeyword, false, noCollapseWS, val...)
 97 | 	case []interface{}:
 98 | 		k, ok := isSameKind(val)
 99 | 		if !ok {
100 | 			return nil, ErrInvalidKind{
101 | 				Kind:     reflect.Array,
102 | 				T:        identKeyword,
103 | 				Critical: false,
104 | 				Msg:      "mixed type slice",
105 | 			}
106 | 		}
107 | 		switch v := k; {
108 | 		case v == reflect.String:
109 | 			return newStringKeyword(TextPatternKeyword, false, noCollapseWS, castIfaceToString(val)...)
110 | 		default:
111 | 			return nil, ErrInvalidKind{
112 | 				Kind:     v,
113 | 				T:        identKeyword,
114 | 				Critical: false,
115 | 				Msg:      "unsupported data type",
116 | 			}
117 | 		}
118 | 
119 | 	default:
120 | 		// TODO
121 | 		return nil, ErrInvalidKeywordConstruct{Expr: expr}
122 | 	}
123 | }
124 | 
125 | func newStringKeyword(mod TextPatternModifier, lower, noCollapseWS bool, patterns ...string) (*Keyword, error) {
126 | 	matcher, err := NewStringMatcher(mod, lower, false, noCollapseWS, patterns...)
127 | 	if err != nil {
128 | 		return nil, err
129 | 	}
130 | 	return &Keyword{S: matcher}, nil
131 | }
132 | 
133 | type SelectionNumItem struct {
134 | 	Key     string
135 | 	Pattern NumMatcher
136 | }
137 | 
138 | type SelectionStringItem struct {
139 | 	Key     string
140 | 	Pattern StringMatcher
141 | }
142 | 
143 | type Selection struct {
144 | 	N []SelectionNumItem
145 | 	S []SelectionStringItem
146 | 	stats
147 | }
148 | 
149 | // Match implements Matcher
150 | // TODO - numeric and boolean pattern match
151 | func (s Selection) Match(msg Event) (bool, bool) {
152 | 	for _, v := range s.N {
153 | 		val, ok := msg.Select(v.Key)
154 | 		if !ok {
155 | 			return false, false
156 | 		}
157 | 		switch vt := val.(type) {
158 | 		case string:
159 | 			n, err := strconv.Atoi(vt)
160 | 			if err != nil {
161 | 				// TODO - better debugging
162 | 				return false, true
163 | 			}
164 | 			if !v.Pattern.NumMatch(n) {
165 | 				return false, true
166 | 			}
167 | 		case json.Number:
168 | 			n, err := vt.Int64()
169 | 			if err != nil {
170 | 				// TODO - better debugging
171 | 				return false, true
172 | 			}
173 | 			if !v.Pattern.NumMatch(int(n)) {
174 | 				return false, true
175 | 			}
176 | 		case float64:
177 | 			// JSON numbers are all by spec float64 values
178 | 			if !v.Pattern.NumMatch(int(vt)) {
179 | 				return false, true
180 | 			}
181 | 		case int:
182 | 			// JSON numbers are all by spec float64 values
183 | 			if !v.Pattern.NumMatch(vt) {
184 | 				return false, true
185 | 			}
186 | 		case int64:
187 | 			// JSON numbers are all by spec float64 values
188 | 			if !v.Pattern.NumMatch(int(vt)) {
189 | 				return false, true
190 | 			}
191 | 		case int32:
192 | 			// JSON numbers are all by spec float64 values
193 | 			if !v.Pattern.NumMatch(int(vt)) {
194 | 				return false, true
195 | 			}
196 | 		case uint:
197 | 			// JSON numbers are all by spec float64 values
198 | 			if !v.Pattern.NumMatch(int(vt)) {
199 | 				return false, true
200 | 			}
201 | 		case uint32:
202 | 			// JSON numbers are all by spec float64 values
203 | 			if !v.Pattern.NumMatch(int(vt)) {
204 | 				return false, true
205 | 			}
206 | 		case uint64:
207 | 			// JSON numbers are all by spec float64 values
208 | 			if !v.Pattern.NumMatch(int(vt)) {
209 | 				return false, true
210 | 			}
211 | 		}
212 | 	}
213 | 	for _, v := range s.S {
214 | 		val, ok := msg.Select(v.Key)
215 | 		if !ok {
216 | 			return false, false
217 | 		}
218 | 		switch vt := val.(type) {
219 | 		case string:
220 | 			if !v.Pattern.StringMatch(vt) {
221 | 				return false, true
222 | 			}
223 | 		case json.Number:
224 | 			if !v.Pattern.StringMatch(vt.String()) {
225 | 				return false, true
226 | 			}
227 | 		case float64:
228 | 			// TODO - tmp hack that also loses floating point accuracy
229 | 			if !v.Pattern.StringMatch(strconv.Itoa(int(vt))) {
230 | 				return false, true
231 | 			}
232 | 		default:
233 | 			s.incrementMismatchCount()
234 | 			return false, true
235 | 		}
236 | 	}
237 | 	return true, true
238 | }
239 | 
240 | func (s *Selection) incrementMismatchCount() *Selection {
241 | 	s.stats.TypeMismatchCount++
242 | 	return s
243 | }
244 | 
245 | func newSelectionFromMap(expr map[string]interface{}, noCollapseWS bool) (*Selection, error) {
246 | 	sel := &Selection{S: make([]SelectionStringItem, 0)}
247 | 	for key, pattern := range expr {
248 | 		var mod TextPatternModifier
249 | 		var all bool
250 | 		if strings.Contains(key, "|") {
251 | 			bits := strings.Split(key, "|")
252 | 			// allow support for longer chaining later on; simplifies specifier validation as well (I think)
253 | 			for _, curBit := range bits[1:] {
254 | 				// excepting 'all', the supported modifiers are mutually exclusive; last one wins
255 | 				switch curBit {
256 | 				case "startswith":
257 | 					mod = TextPatternPrefix
258 | 				case "endswith":
259 | 					mod = TextPatternSuffix
260 | 				case "re":
261 | 					mod = TextPatternRegex // this is really a type, not a transformation per spec
262 | 				case "contains":
263 | 					mod = TextPatternContains
264 | 				case "all":
265 | 					all = true
266 | 				default:
267 | 					return nil, fmt.Errorf("selection key %s specifier %s invalid",
268 | 						key, curBit)
269 | 				}
270 | 			}
271 | 			// strip off the specifier from the key so we can look it up correctly
272 | 			key = bits[0]
273 | 		}
274 | 		switch pat := pattern.(type) {
275 | 		case string:
276 | 			m, err := NewStringMatcher(mod, false, all, noCollapseWS, pat)
277 | 			if err != nil {
278 | 				return nil, err
279 | 			}
280 | 			sel.S = append(sel.S, SelectionStringItem{Key: key, Pattern: m})
281 | 		case int:
282 | 			m, err := NewNumMatcher(pat)
283 | 			if err != nil {
284 | 				return nil, err
285 | 			}
286 | 			sel.N = func() []SelectionNumItem {
287 | 				item := SelectionNumItem{
288 | 					Key: key, Pattern: m,
289 | 				}
290 | 				if sel.N == nil {
291 | 					sel.N = []SelectionNumItem{item}
292 | 				}
293 | 				return append(sel.N, item)
294 | 			}()
295 | 		case []interface{}:
296 | 			// TODO - move this part to separate function and reuse in NewKeyword
297 | 			k, ok := isSameKind(pat)
298 | 			if !ok {
299 | 				return nil, ErrInvalidKind{
300 | 					Kind:     reflect.Array,
301 | 					T:        identKeyword,
302 | 					Critical: false,
303 | 					Msg:      "mixed type slice",
304 | 				}
305 | 			}
306 | 			switch k {
307 | 			case reflect.String:
308 | 				m, err := NewStringMatcher(mod, false, all, noCollapseWS, castIfaceToString(pat)...)
309 | 				if err != nil {
310 | 					return nil, err
311 | 				}
312 | 				sel.S = append(sel.S, SelectionStringItem{Key: key, Pattern: m})
313 | 			case reflect.Int:
314 | 				m, err := NewNumMatcher(castIfaceToInt(pat)...)
315 | 				if err != nil {
316 | 					return nil, err
317 | 				}
318 | 				sel.N = func() []SelectionNumItem {
319 | 					item := SelectionNumItem{
320 | 						Key: key, Pattern: m,
321 | 					}
322 | 					if sel.N == nil {
323 | 						sel.N = []SelectionNumItem{item}
324 | 					}
325 | 					return append(sel.N, item)
326 | 				}()
327 | 			default:
328 | 				return nil, ErrInvalidKind{
329 | 					Kind:     k,
330 | 					T:        identKeyword,
331 | 					Critical: false,
332 | 					Msg:      "unsupported data type",
333 | 				}
334 | 			}
335 | 		default:
336 | 			if t := reflect.TypeOf(pattern); t != nil {
337 | 				return nil, ErrInvalidKind{
338 | 					Kind:     t.Kind(),
339 | 					T:        identSelection,
340 | 					Critical: true,
341 | 					Msg:      "unsupported selection value",
342 | 				}
343 | 			}
344 | 			return nil, ErrUnableToReflect
345 | 		}
346 | 	}
347 | 	return sel, nil
348 | }
349 | 
350 | func NewSelectionBranch(expr interface{}, noCollapseWS bool) (Branch, error) {
351 | 	switch v := expr.(type) {
352 | 	case []interface{}:
353 | 		selections := make([]Branch, 0)
354 | 		for _, item := range v {
355 | 			b, err := NewSelectionBranch(item, noCollapseWS)
356 | 			if err != nil {
357 | 				return nil, err
358 | 			}
359 | 			selections = append(selections, b)
360 | 		}
361 | 		return NodeSimpleOr(selections).Reduce(), nil
362 | 	case map[interface{}]interface{}:
363 | 		return newSelectionFromMap(cleanUpInterfaceMap(v), noCollapseWS)
364 | 	default:
365 | 		return nil, ErrInvalidKind{
366 | 			Kind:     reflect.TypeOf(expr).Kind(),
367 | 			T:        identSelection,
368 | 			Critical: true,
369 | 			Msg:      "unsupported selection root container",
370 | 		}
371 | 	}
372 | }
373 | 
374 | func isSameKind(data []interface{}) (reflect.Kind, bool) {
375 | 	var current, last reflect.Kind
376 | 	for i, d := range data {
377 | 		cType := reflect.TypeOf(d)
378 | 		if cType == nil {
379 | 			return reflect.Invalid, false
380 | 		}
381 | 		current = cType.Kind()
382 | 		if i > 0 {
383 | 			if current != last {
384 | 				return current, false
385 | 			}
386 | 		}
387 | 		last = current
388 | 	}
389 | 	return current, true
390 | }
391 | 
392 | func castIfaceToString(items []interface{}) []string {
393 | 	tx := make([]string, 0)
394 | 	for _, val := range items {
395 | 		tx = append(tx, fmt.Sprintf("%v", val))
396 | 	}
397 | 	return tx
398 | }
399 | 
400 | func castIfaceToInt(items []interface{}) []int {
401 | 	tx := make([]int, 0)
402 | 	for _, val := range items {
403 | 		if n, ok := val.(int); ok {
404 | 			tx = append(tx, n)
405 | 		}
406 | 	}
407 | 	return tx
408 | }
409 | 
410 | // Yaml can have non-string keys, so go-yaml unmarshals to map[interface{}]interface{}
411 | // really annoying
412 | func cleanUpInterfaceMap(rx map[interface{}]interface{}) map[string]interface{} {
413 | 	tx := make(map[string]interface{})
414 | 	for k, v := range rx {
415 | 		tx[fmt.Sprintf("%v", k)] = v
416 | 	}
417 | 	return tx
418 | }
419 | 
420 | // stats holds various rule statistics
421 | type stats struct {
422 | 	TypeMismatchCount uint64
423 | }
424 | 


--------------------------------------------------------------------------------
/ident_test.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/markuskont/datamodels"
  9 | 	"gopkg.in/yaml.v2"
 10 | )
 11 | 
 12 | type identExampleType int
 13 | 
 14 | const (
 15 | 	ident1 identExampleType = iota
 16 | 	ident2
 17 | )
 18 | 
 19 | type identPosNegCases struct {
 20 | 	Pos, Neg []Event
 21 | }
 22 | 
 23 | type identTestCase struct {
 24 | 	IdentCount int
 25 | 	IdentTypes []identType
 26 | 	Rule       string
 27 | 	Pos, Neg   []string
 28 | 
 29 | 	Example identExampleType
 30 | }
 31 | 
 32 | func (i identTestCase) sigma() (*identPosNegCases, error) {
 33 | 	posContainer := make([]Event, 0)
 34 | 	negContainer := make([]Event, 0)
 35 | 	switch i.Example {
 36 | 	case ident1:
 37 | 		if i.Pos == nil || len(i.Pos) == 0 {
 38 | 			return nil, fmt.Errorf("missing positive test cases")
 39 | 		}
 40 | 		for _, c := range i.Pos {
 41 | 			var obj simpleKeywordAuditEventExample1
 42 | 			if err := json.Unmarshal([]byte(c), &obj); err != nil {
 43 | 				return nil, err
 44 | 			}
 45 | 			posContainer = append(posContainer, obj)
 46 | 		}
 47 | 		for _, c := range i.Neg {
 48 | 			var obj simpleKeywordAuditEventExample1
 49 | 			if err := json.Unmarshal([]byte(c), &obj); err != nil {
 50 | 				return nil, err
 51 | 			}
 52 | 			negContainer = append(negContainer, obj)
 53 | 		}
 54 | 		return &identPosNegCases{Pos: posContainer, Neg: negContainer}, nil
 55 | 	case ident2:
 56 | 		if i.Pos == nil || len(i.Pos) == 0 {
 57 | 			return nil, fmt.Errorf("missing positive test cases")
 58 | 		}
 59 | 		for _, c := range i.Pos {
 60 | 			var obj datamodels.Map
 61 | 			if err := json.Unmarshal([]byte(c), &obj); err != nil {
 62 | 				return nil, err
 63 | 			}
 64 | 			posContainer = append(posContainer, obj)
 65 | 		}
 66 | 		if i.Neg == nil || len(i.Neg) == 0 {
 67 | 			return nil, fmt.Errorf("missing negative test cases")
 68 | 		}
 69 | 		for _, c := range i.Neg {
 70 | 			var obj datamodels.Map
 71 | 			if err := json.Unmarshal([]byte(c), &obj); err != nil {
 72 | 				return nil, err
 73 | 			}
 74 | 			negContainer = append(negContainer, obj)
 75 | 		}
 76 | 		return &identPosNegCases{Pos: posContainer, Neg: negContainer}, nil
 77 | 	}
 78 | 	return nil, fmt.Errorf("Unknown identifier test case")
 79 | }
 80 | 
 81 | type simpleKeywordAuditEventExample1 struct {
 82 | 	Command string `json:"cmd"`
 83 | }
 84 | 
 85 | // Keywords implements Keyworder
 86 | func (s simpleKeywordAuditEventExample1) Keywords() ([]string, bool) {
 87 | 	return []string{s.Command}, true
 88 | }
 89 | 
 90 | // Select implements Selector
 91 | func (s simpleKeywordAuditEventExample1) Select(_ string) (interface{}, bool) {
 92 | 	return nil, false
 93 | }
 94 | 
 95 | var identSelection1 = `
 96 | ---
 97 | detection:
 98 |   condition: selection
 99 |   selection:
100 |     winlog.event_data.ScriptBlockText|contains:
101 |     - ' -FromBase64String'
102 |     - '::FromBase64String'
103 | `
104 | 
105 | var identSelection1pos1 = `
106 | {
107 |   "event_id": 4104,
108 |   "channel": "Microsoft-Windows-PowerShell/Operational",
109 |   "task": "Execute a Remote Command",
110 |   "opcode": "On create calls",
111 |   "version": 1,
112 |   "record_id": 1559,
113 | 	"winlog": {
114 | 		"event_data": {
115 | 			"MessageNumber": "1",
116 | 			"MessageTotal": "1",
117 | 			"ScriptBlockText": "$s=New-Object IO.MemoryStream(,[Convert]::FromBase64String(\"OMITTED BASE64 STRING\"));",
118 | 			"ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b"
119 | 		}
120 | 	}
121 | }
122 | `
123 | 
124 | // another command
125 | var identSelection1neg1 = `
126 | {
127 |   "event_id": 4104,
128 |   "channel": "Microsoft-Windows-PowerShell/Operational",
129 |   "task": "Execute a Remote Command",
130 |   "opcode": "On create calls",
131 |   "version": 1,
132 |   "record_id": 1559,
133 | 	"winlog": {
134 | 		"event_data": {
135 | 			"MessageNumber": "1",
136 | 			"MessageTotal": "1",
137 | 			"ScriptBlockText": "Some awesome command",
138 | 			"ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b"
139 | 		}
140 | 	}
141 | }
142 | `
143 | 
144 | // missing field
145 | var identSelection1neg2 = `
146 | {
147 |   "event_id": 4104,
148 |   "channel": "Microsoft-Windows-PowerShell/Operational",
149 |   "task": "Execute a Remote Command",
150 |   "opcode": "On create calls",
151 |   "version": 1,
152 |   "record_id": 1559,
153 | 	"winlog": {
154 | 		"event_data": {
155 | 			"MessageNumber": "1",
156 | 			"MessageTotal": "1",
157 | 			"ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b"
158 | 		}
159 | 	}
160 | }
161 | `
162 | 
163 | var identKeyword1 = `
164 | ---
165 | detection:
166 |   condition: keywords
167 |   keywords:
168 |   - 'bash -c'
169 |   - 'cat /etc/shadow'
170 | `
171 | 
172 | var identKeyword1pos1 = `
173 | { "cmd": "sudo bash -c \"cat /etc/shadow /etc/group /etc/passwd\"" }
174 | `
175 | 
176 | var identKeyword1neg1 = `
177 | { "cmd": "sh -c \"cat /etc/resolv.conf\"" }
178 | `
179 | 
180 | var identKeyword2 = `
181 | ---
182 | detection:
183 |   condition: keywords
184 |   keywords:
185 |   - 'wget * - http* | perl'
186 |   - 'wget * - http* | sh'
187 |   - 'wget * - http* | bash'
188 |   - "*python -m Simple*Server"
189 | `
190 | 
191 | var identKeyword2pos1 = `
192 | { "cmd": "/usr/bin/python -m SimpleHTTPServer" }
193 | `
194 | 
195 | var identKeyword2neg1 = `
196 | { "cmd": "/usr/bin/python -m pip install --user pip" }
197 | `
198 | 
199 | var identKeyword3 = `
200 | ---
201 | detection:
202 |   condition: keywords
203 |   keywords:
204 |   - '/\S+python.* -m Simple\w+Server.*/'
205 | `
206 | 
207 | var identSelection2 = `
208 | ---
209 | detection:
210 |   condition: selection
211 |   selection:
212 |     event_id:
213 |     - 8888
214 |     - 1337
215 |     - 13
216 | `
217 | 
218 | var identSelection3 = `
219 | ---
220 | detection:
221 |   condition: selection
222 |   selection:
223 |     event_id: 1337
224 | `
225 | 
226 | var identSelection2pos1 = `
227 | {
228 |   "event_id": 1337,
229 |   "channel": "Microsoft-Windows-PowerShell/Operational",
230 |   "task": "Execute a Remote Command",
231 |   "opcode": "On create calls",
232 |   "version": 1,
233 |   "record_id": 1559,
234 | 	"winlog": {
235 | 		"event_data": {
236 | 			"MessageNumber": "1",
237 | 			"MessageTotal": "1",
238 | 			"ScriptBlockText": "Some awesome command",
239 | 			"ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b"
240 | 		}
241 | 	}
242 | }
243 | `
244 | 
245 | var identSelection2neg1 = `
246 | {
247 |   "event_id": 4104,
248 |   "channel": "Microsoft-Windows-PowerShell/Operational",
249 |   "task": "Execute a Remote Command",
250 |   "opcode": "On create calls",
251 |   "version": 1,
252 |   "record_id": 1559,
253 | 	"winlog": {
254 | 		"event_data": {
255 | 			"MessageNumber": "1",
256 | 			"MessageTotal": "1",
257 | 			"ScriptBlockText": "Some awesome command",
258 | 			"ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b"
259 | 		}
260 | 	}
261 | }
262 | `
263 | 
264 | var identSelection2neg2 = `
265 | {
266 |   "channel": "Microsoft-Windows-PowerShell/Operational",
267 |   "task": "Execute a Remote Command",
268 |   "opcode": "On create calls",
269 |   "version": 1,
270 |   "record_id": 1559,
271 | 	"winlog": {
272 | 		"event_data": {
273 | 			"MessageNumber": "1",
274 | 			"MessageTotal": "1",
275 | 			"ScriptBlockText": "Some awesome command",
276 | 			"ScriptBlockId": "ecbb39e8-1896-41be-b1db-9a33ed76314b"
277 | 		}
278 | 	}
279 | }
280 | `
281 | 
282 | var selectionCases = []identTestCase{
283 | 	{
284 | 		IdentCount: 1,
285 | 		Rule:       identSelection1,
286 | 		IdentTypes: []identType{identSelection},
287 | 		Pos:        []string{identSelection1pos1},
288 | 		Neg:        []string{identSelection1neg1, identSelection1neg2},
289 | 		Example:    ident2,
290 | 	},
291 | 	{
292 | 		IdentCount: 1,
293 | 		Rule:       identSelection2,
294 | 		IdentTypes: []identType{identSelection},
295 | 		Pos:        []string{identSelection2pos1},
296 | 		Neg:        []string{identSelection2neg1, identSelection2neg2},
297 | 		Example:    ident2,
298 | 	},
299 | 	{
300 | 		IdentCount: 1,
301 | 		Rule:       identSelection3,
302 | 		IdentTypes: []identType{identSelection},
303 | 		Pos:        []string{identSelection2pos1},
304 | 		Neg:        []string{identSelection2neg1, identSelection2neg2},
305 | 		Example:    ident2,
306 | 	},
307 | }
308 | 
309 | var keywordCases = []identTestCase{
310 | 	{
311 | 		IdentCount: 1,
312 | 		Rule:       identKeyword1,
313 | 		IdentTypes: []identType{identKeyword},
314 | 		Pos:        []string{identKeyword1pos1},
315 | 		Neg:        []string{identKeyword1neg1},
316 | 		Example:    ident1,
317 | 	},
318 | 	{
319 | 		IdentCount: 1,
320 | 		Rule:       identKeyword2,
321 | 		IdentTypes: []identType{identKeyword},
322 | 		Pos:        []string{identKeyword2pos1},
323 | 		Neg:        []string{identKeyword2neg1},
324 | 		Example:    ident1,
325 | 	},
326 | 	{
327 | 		IdentCount: 1,
328 | 		Rule:       identKeyword3,
329 | 		IdentTypes: []identType{identKeyword},
330 | 		Pos:        []string{identKeyword2pos1},
331 | 		Neg:        []string{identKeyword2neg1},
332 | 		Example:    ident1,
333 | 	},
334 | }
335 | 
336 | var identCases = append(keywordCases, selectionCases...)
337 | 
338 | func TestParseIdent(t *testing.T) {
339 | 	for i, c := range identCases {
340 | 		var r Rule
341 | 		if err := yaml.Unmarshal([]byte(c.Rule), &r); err != nil {
342 | 			t.Fatalf("ident case %d yaml parse fail: %s", i+1, err)
343 | 		}
344 | 		condition, ok := r.Detection["condition"].(string)
345 | 		if !ok {
346 | 			t.Fatalf("ident case %d missing condition", i+1)
347 | 		}
348 | 		l := lex(condition)
349 | 		var items, j int
350 | 		keywords := make([]Matcher, 0)
351 | 		selections := make([]Matcher, 0)
352 | 		for item := range l.items {
353 | 			switch item.T {
354 | 			case TokIdentifier:
355 | 				val, ok := r.Detection[item.Val]
356 | 				if !ok {
357 | 					t.Fatalf("ident case %d missing ident %s or unable to extract", i+1, item.Val)
358 | 				}
359 | 				items++
360 | 				if k := checkIdentType(item.Val, val); k != c.IdentTypes[j] {
361 | 					t.Fatalf("ident case %d ident %d kind mismatch expected %s got %s",
362 | 						i+1, j+1, c.IdentTypes[j], k)
363 | 				}
364 | 				switch c.IdentTypes[j] {
365 | 				case identKeyword:
366 | 					kw, err := NewKeyword(val, false)
367 | 					if err != nil {
368 | 						t.Fatalf("ident case %d token %d failed to parse as keyword: %s",
369 | 							i+1, j+1, err)
370 | 					}
371 | 					keywords = append(keywords, kw)
372 | 				case identSelection:
373 | 					sel, err := NewSelectionBranch(val, false)
374 | 					if err != nil {
375 | 						t.Fatalf("ident case %d token %d failed to parse as selection: %s",
376 | 							i+1, j+1, err)
377 | 					}
378 | 					selections = append(selections, sel)
379 | 				}
380 | 				j++
381 | 			}
382 | 		}
383 | 		if items != c.IdentCount {
384 | 			t.Fatalf("ident case %d defined element count %d does not match processd %d",
385 | 				i+1, c.IdentCount, items)
386 | 		}
387 | 		cases, err := c.sigma()
388 | 		if err != nil {
389 | 			t.Fatalf("ident case %d unable to cast test cases to sigma events, err: %s",
390 | 				i+1, err)
391 | 		}
392 | 		for _, rule := range keywords {
393 | 			if rule == nil {
394 | 				t.Fatalf("ident case %d nil rule pointer", i+1)
395 | 			}
396 | 			for j, c := range cases.Pos {
397 | 				m, _ := rule.Match(c)
398 | 				if !m {
399 | 					t.Fatalf("ident case %d positive test case %d did not match %s",
400 | 						i+1, j+1, c)
401 | 				}
402 | 			}
403 | 			for j, c := range cases.Neg {
404 | 				m, _ := rule.Match(c)
405 | 				if m {
406 | 					t.Fatalf("ident case %d negative test case %d did not match %s",
407 | 						i+1, j+1, c)
408 | 				}
409 | 			}
410 | 		}
411 | 		for _, rule := range selections {
412 | 			if rule == nil {
413 | 				t.Fatalf("ident case %d nil rule pointer", i+1)
414 | 			}
415 | 			for j, c := range cases.Pos {
416 | 				m, _ := rule.Match(c)
417 | 				if !m {
418 | 					t.Fatalf("ident case %d positive test case %d did not match %s",
419 | 						i+1, j+1, c)
420 | 				}
421 | 			}
422 | 			for j, c := range cases.Neg {
423 | 				m, _ := rule.Match(c)
424 | 				if m {
425 | 					t.Fatalf("ident case %d negative test case %d did not match %s",
426 | 						i+1, j+1, c)
427 | 				}
428 | 			}
429 | 		}
430 | 	}
431 | }
432 | 


--------------------------------------------------------------------------------
/lexer.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | 	"unicode"
  7 | 	"unicode/utf8"
  8 | )
  9 | 
 10 | type lexer struct {
 11 | 	input    string    // we'll store the string being parsed
 12 | 	start    int       // the position we started scanning
 13 | 	position int       // the current position of our scan
 14 | 	width    int       // we'll be using runes which can be double byte
 15 | 	items    chan Item // the channel we'll use to communicate between the lexer and the parser
 16 | }
 17 | 
 18 | // lex creates a lexer and starts scanning the provided input.
 19 | func lex(input string) *lexer {
 20 | 	l := &lexer{
 21 | 		input: input,
 22 | 		items: make(chan Item), // unbuffered
 23 | 	}
 24 | 	go l.scan()
 25 | 	return l
 26 | }
 27 | 
 28 | // ignore resets the start position to the current scan position effectively
 29 | // ignoring any input.
 30 | func (l *lexer) ignore() {
 31 | 	l.start = l.position
 32 | }
 33 | 
 34 | // next advances the lexer state to the next rune.
 35 | func (l *lexer) next() (r rune) {
 36 | 	if l.position >= len(l.input) {
 37 | 		l.width = 0
 38 | 		return eof
 39 | 	}
 40 | 
 41 | 	r, l.width = utf8.DecodeRuneInString(l.todo())
 42 | 	l.position += l.width
 43 | 	return r
 44 | }
 45 | 
 46 | // backup allows us to step back one rune which is helpful when you've crossed
 47 | // a boundary from one state to another.
 48 | func (l *lexer) backup() {
 49 | 	l.position = l.position - 1
 50 | }
 51 | 
 52 | // scan will step through the provided text and execute state functions as
 53 | // state changes are observed in the provided input.
 54 | func (l *lexer) scan() {
 55 | 	// When we begin processing, let's assume we're going to process text.
 56 | 	// One state function will return another until `nil` is returned to signal
 57 | 	// the end of our process.
 58 | 	for fn := lexCondition; fn != nil; {
 59 | 		fn = fn(l)
 60 | 	}
 61 | 	close(l.items)
 62 | }
 63 | 
 64 | func (l *lexer) unsuppf(format string, args ...interface{}) stateFn {
 65 | 	msg := fmt.Sprintf(format, args...)
 66 | 	l.items <- Item{T: TokUnsupp, Val: msg}
 67 | 	return nil
 68 | }
 69 | 
 70 | func (l *lexer) errorf(format string, args ...interface{}) stateFn {
 71 | 	msg := fmt.Sprintf(format, args...)
 72 | 	l.items <- Item{T: TokErr, Val: msg}
 73 | 	return nil
 74 | }
 75 | 
 76 | // emit sends a item over the channel so the parser can collect and manage
 77 | // each segment.
 78 | func (l *lexer) emit(k Token) {
 79 | 	i := Item{T: k, Val: l.input[l.start:l.position]}
 80 | 	l.items <- i
 81 | 	l.ignore() // reset our scanner now that we've dispatched a segment
 82 | }
 83 | 
 84 | func (l lexer) collected() string { return l.input[l.start:l.position] }
 85 | func (l lexer) todo() string      { return l.input[l.position:] }
 86 | 
 87 | // stateFn is a function that is specific to a state within the string.
 88 | type stateFn func(*lexer) stateFn
 89 | 
 90 | // lexCondition scans what is expected to be text.
 91 | func lexCondition(l *lexer) stateFn {
 92 | 	for {
 93 | 		if strings.HasPrefix(l.todo(), TokStOne.Literal()) {
 94 | 			return lexOneOf
 95 | 		}
 96 | 		if strings.HasPrefix(l.todo(), TokStAll.Literal()) {
 97 | 			return lexAllOf
 98 | 		}
 99 | 		switch r := l.next(); {
100 | 		case r == eof:
101 | 			return lexEOF
102 | 		case r == TokSepRpar.Rune():
103 | 			return lexRparWithTokens
104 | 		case r == TokSepLpar.Rune():
105 | 			return lexLpar
106 | 		case r == TokSepPipe.Rune():
107 | 			return lexPipe
108 | 		case unicode.IsSpace(r):
109 | 			return lexAccumulateBeforeWhitespace
110 | 		}
111 | 	}
112 | }
113 | 
114 | func lexStatement(l *lexer) stateFn {
115 | 	return lexCondition
116 | }
117 | 
118 | func lexOneOf(l *lexer) stateFn {
119 | 	l.position += len(TokStOne.Literal())
120 | 	l.emit(TokStOne)
121 | 	return lexCondition
122 | }
123 | 
124 | func lexAllOf(l *lexer) stateFn {
125 | 	l.position += len(TokStAll.Literal())
126 | 	l.emit(TokStAll)
127 | 	return lexCondition
128 | }
129 | 
130 | func lexAggs(l *lexer) stateFn {
131 | 	return l.unsuppf("aggregation not supported yet [%s]", l.input)
132 | }
133 | 
134 | func lexEOF(l *lexer) stateFn {
135 | 	if l.position > l.start {
136 | 		l.emit(checkKeyWord(l.collected()))
137 | 	}
138 | 	l.emit(TokLitEof)
139 | 	return nil
140 | }
141 | 
142 | func lexPipe(l *lexer) stateFn {
143 | 	l.emit(TokSepPipe)
144 | 	return lexAggs
145 | }
146 | 
147 | func lexLpar(l *lexer) stateFn {
148 | 	l.emit(TokSepLpar)
149 | 	return lexCondition
150 | }
151 | 
152 | func lexRparWithTokens(l *lexer) stateFn {
153 | 	// emit any text we've accumulated.
154 | 	if l.position > l.start {
155 | 		l.backup()
156 | 		// There may be N whitespace chars between token RPAR
157 | 		// TODO - may be a more concise way to do this, right now loops like this are everywhere
158 | 
159 | 		if t := checkKeyWord(l.collected()); t != TokNil {
160 | 			l.emit(t)
161 | 		}
162 | 
163 | 		for {
164 | 			switch r := l.next(); {
165 | 			case r == eof:
166 | 				return lexEOF
167 | 			case unicode.IsSpace(r):
168 | 				l.ignore()
169 | 			default:
170 | 				return lexRpar
171 | 			}
172 | 		}
173 | 	}
174 | 	return lexRpar
175 | }
176 | 
177 | func lexRpar(l *lexer) stateFn {
178 | 	l.emit(TokSepRpar)
179 | 	return lexCondition
180 | }
181 | 
182 | func lexAccumulateBeforeWhitespace(l *lexer) stateFn {
183 | 	l.backup()
184 | 	// emit any text we've accumulated.
185 | 	if l.position > l.start {
186 | 		l.emit(checkKeyWord(l.collected()))
187 | 	}
188 | 	return lexWhitespace
189 | }
190 | 
191 | // lexWhitespace scans what is expected to be whitespace.
192 | func lexWhitespace(l *lexer) stateFn {
193 | 	for {
194 | 		switch r := l.next(); {
195 | 		case r == eof:
196 | 			return lexEOF
197 | 		case !unicode.IsSpace(r):
198 | 			l.backup()
199 | 			return lexCondition
200 | 		default:
201 | 			l.ignore()
202 | 		}
203 | 	}
204 | }
205 | 
206 | func checkKeyWord(in string) Token {
207 | 	if len(in) == 0 {
208 | 		return TokNil
209 | 	}
210 | 	switch strings.ToLower(in) {
211 | 	case TokKeywordAnd.Literal():
212 | 		return TokKeywordAnd
213 | 	case TokKeywordOr.Literal():
214 | 		return TokKeywordOr
215 | 	case TokKeywordNot.Literal():
216 | 		return TokKeywordNot
217 | 	case "sum", "min", "max", "count", "avg":
218 | 		return TokKeywordAgg
219 | 	case TokIdentifierAll.Literal():
220 | 		return TokIdentifierAll
221 | 	case TokStOne.Literal():
222 | 		return TokStOne
223 | 	default:
224 | 		if strings.Contains(in, "*") {
225 | 			return TokIdentifierWithWildcard
226 | 		}
227 | 		return TokIdentifier
228 | 	}
229 | }
230 | 


--------------------------------------------------------------------------------
/lexer_test.go:
--------------------------------------------------------------------------------
 1 | package sigma
 2 | 
 3 | import "testing"
 4 | 
 5 | type LexTestCase struct {
 6 | 	Expr   string
 7 | 	Tokens []Token
 8 | }
 9 | 
10 | var LexPosCases = []LexTestCase{
11 | 	{
12 | 		Expr:   "selection",
13 | 		Tokens: []Token{TokIdentifier, TokLitEof},
14 | 	},
15 | 	{
16 | 		Expr: "selection_1 and not filter_0",
17 | 		Tokens: []Token{
18 | 			TokIdentifier, TokKeywordAnd, TokKeywordNot, TokIdentifier, TokLitEof,
19 | 		},
20 | 	},
21 | 	{
22 | 		Expr: "((selection_1 and not filter_0) OR (keyword_0 and not filter1)) or idontcare",
23 | 		Tokens: []Token{
24 | 			TokSepLpar, TokSepLpar, TokIdentifier, TokKeywordAnd, TokKeywordNot, TokIdentifier,
25 | 			TokSepRpar, TokKeywordOr, TokSepLpar, TokIdentifier, TokKeywordAnd, TokKeywordNot,
26 | 			TokIdentifier, TokSepRpar, TokSepRpar, TokKeywordOr, TokIdentifier, TokLitEof,
27 | 		},
28 | 	},
29 | 	{
30 | 		Expr: "all of selection* and not 1 of filter* | count() > 10",
31 | 		Tokens: []Token{
32 | 			TokStAll, TokIdentifierWithWildcard, TokKeywordAnd, TokKeywordNot, TokStOne,
33 | 			TokIdentifierWithWildcard, TokSepPipe, TokUnsupp, TokIdentifier, TokLitEof,
34 | 		},
35 | 	},
36 | }
37 | 
38 | func TestLex(t *testing.T) {
39 | 	for j, c := range LexPosCases {
40 | 		l := lex(c.Expr)
41 | 		var i int
42 | 		for item := range l.items {
43 | 			if item.T != c.Tokens[i] {
44 | 				t.Fatalf(
45 | 					"lex case %d expr %s failed on item %d expected %s got %s",
46 | 					j, c.Expr, i, c.Tokens[i].String(), item.T.String())
47 | 			}
48 | 			i++
49 | 		}
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/nodes.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | // NodeSimpleAnd is a list of matchers connected with logical conjunction
  4 | type NodeSimpleAnd []Branch
  5 | 
  6 | // Match implements Matcher
  7 | func (n NodeSimpleAnd) Match(e Event) (bool, bool) {
  8 | 	for _, b := range n {
  9 | 		match, applicable := b.Match(e)
 10 | 		if !match || !applicable {
 11 | 			return match, applicable
 12 | 		}
 13 | 	}
 14 | 	return true, true
 15 | }
 16 | 
 17 | // Reduce cleans up unneeded slices
 18 | // Static structures can be used if node only holds one or two elements
 19 | // Avoids pointless runtime loops
 20 | func (n NodeSimpleAnd) Reduce() Branch {
 21 | 	if len(n) == 1 {
 22 | 		return n[0]
 23 | 	}
 24 | 	if len(n) == 2 {
 25 | 		return &NodeAnd{L: n[0], R: n[1]}
 26 | 	}
 27 | 	return n
 28 | }
 29 | 
 30 | // NodeSimpleOr is a list of matchers connected with logical disjunction
 31 | type NodeSimpleOr []Branch
 32 | 
 33 | // Reduce cleans up unneeded slices
 34 | // Static structures can be used if node only holds one or two elements
 35 | // Avoids pointless runtime loops
 36 | func (n NodeSimpleOr) Reduce() Branch {
 37 | 	if len(n) == 1 {
 38 | 		return n[0]
 39 | 	}
 40 | 	if len(n) == 2 {
 41 | 		return &NodeOr{L: n[0], R: n[1]}
 42 | 	}
 43 | 	return n
 44 | }
 45 | 
 46 | // Match implements Matcher
 47 | func (n NodeSimpleOr) Match(e Event) (bool, bool) {
 48 | 	var oneApplicable bool
 49 | 	for _, b := range n {
 50 | 		match, applicable := b.Match(e)
 51 | 		if match {
 52 | 			return true, true
 53 | 		}
 54 | 		if applicable {
 55 | 			oneApplicable = true
 56 | 		}
 57 | 	}
 58 | 	return false, oneApplicable
 59 | }
 60 | 
 61 | // NodeNot negates a branch
 62 | type NodeNot struct {
 63 | 	B Branch
 64 | }
 65 | 
 66 | // Match implements Matcher
 67 | func (n NodeNot) Match(e Event) (bool, bool) {
 68 | 	match, applicable := n.B.Match(e)
 69 | 	if !applicable {
 70 | 		return match, applicable
 71 | 	}
 72 | 	return !match, applicable
 73 | }
 74 | 
 75 | // NodeAnd is a two element node of a binary tree with Left and Right branches
 76 | // connected via logical conjunction
 77 | type NodeAnd struct {
 78 | 	L, R Branch
 79 | }
 80 | 
 81 | // Match implements Matcher
 82 | func (n NodeAnd) Match(e Event) (bool, bool) {
 83 | 	lMatch, lApplicable := n.L.Match(e)
 84 | 	if !lMatch {
 85 | 		return false, lApplicable
 86 | 	}
 87 | 	rMatch, rApplicable := n.R.Match(e)
 88 | 	return lMatch && rMatch, lApplicable && rApplicable
 89 | }
 90 | 
 91 | // NodeOr is a two element node of a binary tree with Left and Right branches
 92 | // connected via logical disjunction
 93 | type NodeOr struct {
 94 | 	L, R Branch
 95 | }
 96 | 
 97 | // Match implements Matcher
 98 | func (n NodeOr) Match(e Event) (bool, bool) {
 99 | 	lMatch, lApplicable := n.L.Match(e)
100 | 	if lMatch {
101 | 		return true, lApplicable
102 | 	}
103 | 	rMatch, rApplicable := n.R.Match(e)
104 | 	return lMatch || rMatch, lApplicable || rApplicable
105 | }
106 | 
107 | func newNodeNotIfNegated(b Branch, negated bool) Branch {
108 | 	if negated {
109 | 		return &NodeNot{B: b}
110 | 	}
111 | 	return b
112 | }
113 | 
114 | // TODO - use these functions to create binary trees instead of dunamic slices
115 | func newConjunction(s NodeSimpleAnd) Branch {
116 | 	if l := len(s); l == 1 || l == 2 {
117 | 		return s.Reduce()
118 | 	}
119 | 	return &NodeAnd{
120 | 		L: s[0],
121 | 		R: newConjunction(s[1:]),
122 | 	}
123 | }
124 | 
125 | func newDisjunction(s NodeSimpleOr) Branch {
126 | 	if l := len(s); l == 1 || l == 2 {
127 | 		return s.Reduce()
128 | 	}
129 | 	return &NodeOr{
130 | 		L: s[0],
131 | 		R: newDisjunction(s[1:]),
132 | 	}
133 | }
134 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
 1 | package sigma
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | type parser struct {
 8 | 	// lexer that tokenizes input string
 9 | 	lex *lexer
10 | 
11 | 	// container for collected tokens and their values
12 | 	tokens []Item
13 | 
14 | 	// memorize last token to validate proper sequence
15 | 	// for example, two identifiers have to be joined via logical AND or OR, otherwise the sequence is invalid
16 | 	previous Item
17 | 
18 | 	// sigma detection map that contains condition query and relevant fields
19 | 	sigma Detection
20 | 
21 | 	// for debug
22 | 	condition string
23 | 
24 | 	// resulting rule that can be collected later
25 | 	result Branch
26 | 
27 | 	// if true, stops the parser from collapsing whitespace in non-regex rules (default is false to collapse)
28 | 	// and the data that will be matched against them; default is to collapse whitespace to allow for better
29 | 	// matching in the event that a bad actor attempts to pad whitespace inot a command to fool the engine
30 | 	noCollapseWS bool
31 | }
32 | 
33 | func (p *parser) run() error {
34 | 	if p.lex == nil {
35 | 		return fmt.Errorf("cannot run condition parser, lexer not initialized")
36 | 	}
37 | 	// Pass 1: collect tokens, do basic sequence validation and collect sigma fields
38 | 	if err := p.collect(); err != nil {
39 | 		return err
40 | 	}
41 | 	// Pass 2: find groups
42 | 	if err := p.parse(); err != nil {
43 | 		return err
44 | 	}
45 | 	return nil
46 | }
47 | 
48 | func (p *parser) parse() error {
49 | 	res, err := newBranch(p.sigma, p.tokens, 0, p.noCollapseWS)
50 | 	if err != nil {
51 | 		return err
52 | 	}
53 | 	p.result = res
54 | 	return nil
55 | }
56 | 
57 | // collect gathers all items from lexer and does preliminary sequence validation
58 | func (p *parser) collect() error {
59 | 	for item := range p.lex.items {
60 | 		if item.T == TokUnsupp {
61 | 			return ErrUnsupportedToken{Msg: item.Val}
62 | 		}
63 | 		if p.previous.T != TokBegin && !validTokenSequence(p.previous.T, item.T) {
64 | 			return ErrInvalidTokenSeq{
65 | 				Prev:      p.previous,
66 | 				Next:      item,
67 | 				Collected: p.tokens,
68 | 			}
69 | 		}
70 | 		if item.T != TokLitEof {
71 | 			p.tokens = append(p.tokens, item)
72 | 		}
73 | 		p.previous = item
74 | 	}
75 | 	if p.previous.T != TokLitEof {
76 | 		return ErrIncompleteTokenSeq{
77 | 			Expression: p.condition,
78 | 			Items:      p.tokens,
79 | 			Last:       p.previous,
80 | 		}
81 | 	}
82 | 	return nil
83 | }
84 | 


--------------------------------------------------------------------------------
/parser_test.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/gobwas/glob"
  7 | 	"gopkg.in/yaml.v2"
  8 | )
  9 | 
 10 | var detection1 = `
 11 | detection:
 12 |   condition: "selection1 and not selection3"
 13 |   selection1:
 14 |     Image:
 15 |     - '*\schtasks.exe'
 16 |     - '*\nslookup.exe'
 17 |     - '*\certutil.exe'
 18 |     - '*\bitsadmin.exe'
 19 |     - '*\mshta.exe'
 20 |     ParentImage:
 21 |     - '*\mshta.exe'
 22 |     - '*\powershell.exe'
 23 |     - '*\cmd.exe'
 24 |     - '*\rundll32.exe'
 25 |     - '*\cscript.exe'
 26 |     - '*\wscript.exe'
 27 |     - '*\wmiprvse.exe'
 28 |   selection3:
 29 |     CommandLine: "+R +H +S +A *.cui"
 30 | `
 31 | 
 32 | var detection1_positive = `
 33 | {
 34 | 	"Image":       "C:\\test\\bitsadmin.exe",
 35 | 	"CommandLine": "+R +H +A asd.cui",
 36 | 	"ParentImage": "C:\\test\\wmiprvse.exe",
 37 | 	"Image":       "C:\\test\\bitsadmin.exe",
 38 | 	"CommandLine": "aaa",
 39 | 	"ParentImage": "C:\\test\\wmiprvse.exe"
 40 | }
 41 | `
 42 | 
 43 | var detection1_negative1 = `
 44 | {
 45 | 	"Image":       "C:\\test\\bitsadmin.exe",
 46 | 	"CommandLine": "+R +H +S +A lll.cui",
 47 | 	"ParentImage": "C:\\test\\mshta.exe"
 48 | }
 49 | `
 50 | 
 51 | var detection1_negative2 = `
 52 | {
 53 | 	"Image":       "C:\\test\\bitsadmin.exe",
 54 | 	"CommandLine": "+R +H +S +A lll.cui"
 55 | }
 56 | `
 57 | 
 58 | var detection2 = `
 59 | detection:
 60 |   condition: "(selection1 and selection2) and not selection3"
 61 |   selection1:
 62 |     Image:
 63 |     - '*\schtasks.exe'
 64 |     - '*\nslookup.exe'
 65 |     - '*\certutil.exe'
 66 |     - '*\bitsadmin.exe'
 67 |     - '*\mshta.exe'
 68 |   selection2:
 69 |     ParentImage:
 70 |     - '*\mshta.exe'
 71 |     - '*\powershell.exe'
 72 |     - '*\cmd.exe'
 73 |     - '*\rundll32.exe'
 74 |     - '*\cscript.exe'
 75 |     - '*\wscript.exe'
 76 |     - '*\wmiprvse.exe'
 77 |   selection3:
 78 |     CommandLine: "+R +H +S +A *.cui"
 79 | `
 80 | 
 81 | var detection3 = `
 82 | detection:
 83 |   condition: "(selection1 or selection2) and not selection3"
 84 |   selection1:
 85 |     Image:
 86 |     - '*\schtasks.exe'
 87 |     - '*\nslookup.exe'
 88 |     - '*\certutil.exe'
 89 |     - '*\bitsadmin.exe'
 90 |     - '*\mshta.exe'
 91 |   selection2:
 92 |     ParentImage:
 93 |     - '*\mshta.exe'
 94 |     - '*\powershell.exe'
 95 |     - '*\cmd.exe'
 96 |     - '*\rundll32.exe'
 97 |     - '*\cscript.exe'
 98 |     - '*\wscript.exe'
 99 |     - '*\wmiprvse.exe'
100 |   selection3:
101 |     CommandLine: "+R +H +S +A *.cui"
102 | `
103 | 
104 | var detection3_positive1 = `
105 | {
106 | 	"Image":       "C:\\test\\bitsadmin.exe",
107 | 	"CommandLine": "+R +H +A asd.cui",
108 | 	"ParentImage": "C:\\test\\custom.exe",
109 | 	"Image":       "C:\\test\\bitsadmin.exe",
110 | 	"CommandLine": "aaa",
111 | 	"ParentImage": "C:\\test\\wmiprvse.exe"
112 | }
113 | `
114 | 
115 | var detection3_positive2 = `
116 | {
117 | 	"Image":       "C:\\test\\custom.exe",
118 | 	"CommandLine": "+R +H +A asd.cui",
119 | 	"ParentImage": "C:\\test\\wmiprvse.exe",
120 | 	"Image":       "C:\\test\\bitsadmin.exe",
121 | 	"CommandLine": "aaa",
122 | 	"ParentImage": "C:\\test\\wmiprvse.exe"
123 | }
124 | `
125 | 
126 | var detection3_negative = `
127 | {
128 | 	"Image":       "C:\\test\\bitsadmin.exe",
129 | 	"CommandLine": "+R +H +S +A lll.cui",
130 | 	"ParentImage": "C:\\test\\mshta.exe"
131 | }
132 | `
133 | 
134 | var detection4 = `
135 | detection:
136 |   condition: "all of selection* and not filter"
137 |   selection1:
138 |     Image:
139 |     - '*\schtasks.exe'
140 |     - '*\nslookup.exe'
141 |     - '*\certutil.exe'
142 |     - '*\bitsadmin.exe'
143 |     - '*\mshta.exe'
144 |   selection2:
145 |     ParentImage:
146 |     - '*\mshta.exe'
147 |     - '*\powershell.exe'
148 |     - '*\cmd.exe'
149 |     - '*\rundll32.exe'
150 |     - '*\cscript.exe'
151 |     - '*\wscript.exe'
152 |     - '*\wmiprvse.exe'
153 |   filter:
154 |     CommandLine: "+R +H +S +A *.cui"
155 | `
156 | 
157 | var detection5 = `
158 | detection:
159 |   condition: "1 of selection* and not filter"
160 |   selection1:
161 |     Image:
162 |     - '*\schtasks.exe'
163 |     - '*\nslookup.exe'
164 |     - '*\certutil.exe'
165 |     - '*\bitsadmin.exe'
166 |     - '*\mshta.exe'
167 |   selection2:
168 |     ParentImage:
169 |     - '*\mshta.exe'
170 |     - '*\powershell.exe'
171 |     - '*\cmd.exe'
172 |     - '*\rundll32.exe'
173 |     - '*\cscript.exe'
174 |     - '*\wscript.exe'
175 |     - '*\wmiprvse.exe'
176 |   filter:
177 |     CommandLine: "+R +H +S +A *.cui"
178 | `
179 | 
180 | var detection6 = `
181 | detection:
182 |   condition: "all of them"
183 |   selection_images:
184 |     Image:
185 |     - '*\schtasks.exe'
186 |     - '*\nslookup.exe'
187 |     - '*\certutil.exe'
188 |     - '*\bitsadmin.exe'
189 |     - '*\mshta.exe'
190 |   selection_parent_images:
191 |     ParentImage:
192 |     - '*\mshta.exe'
193 |     - '*\powershell.exe'
194 |     - '*\cmd.exe'
195 |     - '*\rundll32.exe'
196 |     - '*\cscript.exe'
197 |     - '*\wscript.exe'
198 |     - '*\wmiprvse.exe'
199 | `
200 | 
201 | var detection6_positive = `
202 | {
203 | 	"Image":       "C:\\test\\bitsadmin.exe",
204 | 	"CommandLine": "+R +H +A asd.cui",
205 | 	"ParentImage": "C:\\test\\wmiprvse.exe",
206 | 	"Image":       "C:\\test\\bitsadmin.exe",
207 | 	"CommandLine": "aaa",
208 | 	"ParentImage": "C:\\test\\wmiprvse.exe"
209 | }
210 | `
211 | 
212 | var detection6_negative = `
213 | {
214 | 	"Image":       "C:\\test\\bitsadmin.exe",
215 | 	"CommandLine": "+R +H +S +A lll.cui",
216 | 	"ParentImage": "C:\\test\\mshta\\lll.exe"
217 | }
218 | `
219 | 
220 | var detection7 = `
221 | detection:
222 |   condition: "1 of them"
223 |   selection_images:
224 |     Image:
225 |     - '*\schtasks.exe'
226 |     - '*\nslookup.exe'
227 |     - '*\certutil.exe'
228 |     - '*\bitsadmin.exe'
229 |     - '*\mshta.exe'
230 |   selection_parent_images:
231 |     ParentImage:
232 |     - '*\mshta.exe'
233 |     - '*\powershell.exe'
234 |     - '*\cmd.exe'
235 |     - '*\rundll32.exe'
236 |     - '*\cscript.exe'
237 |     - '*\wscript.exe'
238 |     - '*\wmiprvse.exe'
239 | `
240 | 
241 | var detection7_negative1 = `
242 | {
243 | 	"Image":       "C:\\test\\bytesadmin.exe",
244 | 	"CommandLine": "+R +H +S +A lll.cui",
245 | 	"ParentImage": "E:\\go\\bin\\gofmt"
246 | }
247 | `
248 | 
249 | var detection7_negative2 = `
250 | {
251 | 	"Image":       "C:\\test\\bytesadmin.exe",
252 | 	"CommandLine": "+R +H +S +A lll.cui"
253 | }
254 | `
255 | 
256 | var detection8 = `
257 | detection:
258 |   condition: "selection1 and not selection3"
259 |   selection1:
260 |     Image:
261 |     - '*\schtasks.exe'
262 |     - '*\nslookup.exe'
263 |     - '*\certutil.exe'
264 |     - '*\bitsadmin.exe'
265 |     - '*\mshta.exe'
266 |     ParentImage:
267 |     - '*\mshta.exe'
268 |     - '*\powershell.exe'
269 |     - '*\cmd.exe'
270 |     - '*\rundll32.exe'
271 |     - '*\cscript.exe'
272 |     - '*\wscript.exe'
273 |     - '*\wmiprvse.exe'
274 |   selection3:
275 |     CommandLine: "+R +H +S +A *.cui"
276 | `
277 | 
278 | var detection8_positive = `
279 | {
280 | 	"Image":       "C:\\test\\bitsadmin.exe",
281 | 	"CommandLine": "+R +H +A asd.cui",
282 | 	"ParentImage": "C:\\test\\wmiprvse.exe",
283 | 	"Image":       "C:\\test\\bitsadmin.exe",
284 | 	"CommandLine": "aaa",
285 | 	"ParentImage": "C:\\test\\wmiprvse.exe"
286 | }
287 | `
288 | 
289 | var detection8_negative1 = `
290 | {
291 | 	"Image":       "C:\\test\\bitsadmin.exe",
292 | 	"CommandLine": "+R +H +S +A lll.cui",
293 | 	"ParentImage": "C:\\test\\mshta.exe"
294 | }
295 | `
296 | 
297 | var detection8_negative2 = `
298 | {
299 | 	"Image":       "C:\\test\\bitsadmin.exe",
300 | 	"ParentImage": "C:\\test\\mshta.exe"
301 | }
302 | `
303 | 
304 | var detection9 = `
305 | detection:
306 |   condition: "selection"
307 |   selection:
308 |     - PipeName|re: '\\\\SomePipeName[0-9a-f]{2}'
309 |     - PipeName2|re: '\\\\AnotherPipe[0-9a-f]*Name'
310 | `
311 | 
312 | var detection9_positive = `
313 | {
314 | 	"PipeName":       "\\\\SomePipeNamea4",
315 | 	"PipeName2":       "\\\\AnotherPipe0af3Name"
316 | }
317 | `
318 | 
319 | var detection9_negative = `
320 | {
321 | 	"PipeName":       "\\\\SomePipeNameZZ",
322 | 	"PipeName2":       "\\\\AnotherPipe01zzName"
323 | }
324 | `
325 | 
326 | var detection10 = `
327 | detection:
328 |   condition: "selection1 and selection2"
329 |   selection1:
330 |     - SomeName|startswith: 'TestStart'
331 |   selection2:
332 |     - SomeName|endswith: 'TestEnd'
333 | `
334 | 
335 | var detection10_positive = `
336 | {
337 | 	"SomeName":       "TestStart-Value-TestEnd"
338 | }
339 | `
340 | 
341 | var detection10_negative = `
342 | {
343 | 	"SomeName":       "TestStart-Value"
344 | }
345 | `
346 | 
347 | var detection11 = `
348 | detection:
349 |   condition: "selection1 and selection2"
350 |   selection1:
351 |     SomeName|contains|all: 
352 |       - 'mark1'
353 |       - 'mark2'
354 |   selection2:
355 |     SomeName|contains:
356 |       - 'version1'
357 |       - 'version2'
358 | `
359 | 
360 | var detection11_positive = `
361 | {
362 | 	"SomeName":       "Some mark1 mark2 String version2"
363 | }
364 | `
365 | 
366 | var detection11_negative = `
367 | {
368 | 	"SomeName":       "mark1 mark2 mark3 non-matching string"
369 | }
370 | `
371 | 
372 | var detection12 = `
373 | detection:
374 |   condition: "selection1 and selection2"
375 |   selection1:
376 |     SomeKey|contains|all:
377 |       - 'val1'
378 |       - 'val2'
379 |   selection2:
380 |     SomeKey2:
381 |       - 'mustMatch1'
382 |       - 'mustMatch2'
383 | `
384 | 
385 | var detection12_positive = `
386 | {
387 | 	"SomeKey":       "val1 val2",
388 | 	"SomeKey2":      "mustMatch1"
389 | }
390 | `
391 | 
392 | var detection12_negative = `
393 | {
394 | 	"SomeKey":       "val1 val2",
395 | 	"SomeKey2":      "mustMatch3"
396 | }
397 | `
398 | 
399 | // this test is a bit tricky:
400 | // the '*\bits\*admin.exe' is looking to match '[wildCard]\bits*admin.exe' (one wildcard at head, one escaped wildcard)
401 | // the '\\\\DoubleBackslash\\some*.exe' is looking to match '\\DoubleBackslash\some[wildCard].exe' (multiple backslashes, one wildcard)
402 | // the '\leadingBackslash\\*.exe' is looking to match '\leadingBackslash\[wildCard].exe' (one wildcard and leading backslash)
403 | // the 'full\\\*plaintext.exe' is looking to match 'full\*plaintext.exe' (no wildcards exact match)
404 | var detection13 = `
405 | detection:
406 |   condition: "all of them"
407 |   selection_images:
408 |     Image:
409 |     - '*\bits\*admin.exe'
410 |     - '\\\\DoubleBackslash\\some*.exe'
411 |     - '[Windows-*]\image.???'
412 |   selection_parent_images:
413 |     ParentImage:
414 |     - '\leadingBackslash\\*.exe'
415 |     - 'full\\\*plaintext.exe'
416 |     - '{000-aaa-*}\\*.exe'
417 | `
418 | 
419 | var detection13_positive = `
420 | {
421 | 	"Image":       "C:\\test\\bits*admin.exe",
422 | 	"ParentImage": "\\leadingBackslash\\something.exe"
423 | }
424 | `
425 | 
426 | var detection13_positive2 = `
427 | {
428 | 	"Image":       "\\\\DoubleBackslash\\someOther.exe",
429 | 	"ParentImage": "full\\*plaintext.exe"
430 | }
431 | `
432 | 
433 | var detection13_positive3 = `
434 | {
435 | 	"Image":       "C:\\test\\bits*admin.exe",
436 | 	"ParentImage": "full\\*plaintext.exe"
437 | }
438 | `
439 | 
440 | var detection13_positive4 = `
441 | {
442 | 	"Image":       "[Windows-Security]\\image.cmd",
443 | 	"ParentImage": "{000-aaa-123}\\evil.exe"
444 | }
445 | `
446 | 
447 | // won't match as Image is looking for '*\bits*admin.exe' witha leading wildcard and an escaped '*' between bits and admin
448 | // this provides 'C:\test\bitsadmin.exe', which matches the leading wildcard but fails to present the escaped '*'
449 | var detection13_negative = `
450 | {
451 | 	"Image":       "C:\\test\\bitsadmin.exe",
452 | 	"ParentImage": "\\leadingBackslash\\something.exe"
453 | }
454 | `
455 | 
456 | // won't match as the ParentImage is looking for '\leadingBackslash\*.exe' with a wildcard
457 | // this provides 'leadingBackslash\something.exe', missing the leading backslash
458 | var detection13_negative2 = `
459 | {
460 | 	"Image":       "C:\\test\\bits*admin.exe",
461 | 	"ParentImage": "leadingBackslash\\something.exe"
462 | }
463 | `
464 | 
465 | // won't match as the ParentImage is looking for an exact match (no wildcards) to 'full\*plaintext.exe'
466 | // this provides 'full\\*plaintext', the extra backslash kills it
467 | var detection13_negative3 = `
468 | {
469 | 	"Image":       "C:\\test\\bits*admin.exe",
470 | 	"ParentImage": "full\\\\*plaintext"
471 | }
472 | `
473 | 
474 | // shouldn't match on either of these (Image is missing 'Windows' in the bracket, ParentImage is missing the
475 | // a vaule of 000-aaa in the brackets)
476 | var detection13_negative4 = `
477 | {
478 | 	"Image":       "[-Security]\\image.cmd",
479 | 	"ParentImage": "{000-aaa}\\evil.exe"
480 | }
481 | `
482 | 
483 | // this has a hacky test; we set 'noCollapseWSNeg' in the parseTestCast struct for this case specifically
484 | // doing so will turn off collapsing the whitespace for the negative test and cause this to fail detection
485 | var detection14 = `
486 | detection:
487 |   condition: "selection"
488 |   selection:
489 |     SomeName|contains:
490 |       - 'whitespace   collapse	testing'
491 | `
492 | 
493 | var detection14_case = `
494 | {
495 | 	"SomeName":       "whitespace\t\tcollapse         testing"
496 | }
497 | `
498 | 
499 | var detection15 = `
500 | detection:
501 |   condition: "all of selection_* and 1 of option_*"
502 |   selection_images:
503 |     Image:
504 |     - '*bits*admin.exe'
505 |   selection_parent_images:
506 |     ParentImage:
507 |     - '*.exe'
508 |   selection_bar:
509 |     Baz:
510 |     - '*bar*'
511 |   option_1:
512 |     Bar|contains:
513 |     - 'Asdf'
514 |   option_2:
515 |     Test:
516 |     - 123
517 | `
518 | 
519 | var detection15_positive1 = `
520 | {
521 | 	"Image": "C:\\test\\bits\\aaa-admin.exe",
522 | 	"ParentImage": "\\leadingBackslash\\something.exe",
523 |   "Baz": "foo bar baz",
524 |   "Bar": "lalala Asdf [124]"
525 | }
526 | `
527 | 
528 | var detection15_negative1 = `
529 | {
530 | 	"Image": "C:\\test\\bits\\aaa-admin.exe",
531 | 	"ParentImage": "\\leadingBackslash\\something.exe",
532 |   "Baz": "foo bar baz",
533 |   "Bar": "lalala Asd [124]"
534 | }
535 | `
536 | 
537 | var detection15_negative2 = `
538 | {
539 | 	"Image": "C:\\test\\bits\\aaa-admin.exe",
540 | 	"ParentImage": "\\leadingBackslash\\something.exe",
541 |   "Baz": "foo baz",
542 |   "Bar": "lalala Asdf [124]"
543 | }
544 | `
545 | 
546 | var detection15_positive2 = `
547 | {
548 | 	"Image": "C:\\test\\bits\\aaa-admin.exe",
549 | 	"ParentImage": "\\leadingBackslash\\something.exe",
550 |   "Baz": "foo bar baz",
551 |   "Test": 123
552 | }
553 | `
554 | 
555 | var detection15_negative3 = `
556 | {
557 | 	"Image": "C:\\test\\bits\\aaa-admin.exe",
558 | 	"ParentImage": "\\leadingBackslash\\something.exe",
559 |   "Baz": "foo bar baz",
560 |   "Test": 124
561 | }
562 | `
563 | 
564 | var detection15_negative4 = `
565 | {
566 | 	"Image": "C:\\test\\bits\\aaa-admin.exe",
567 | 	"ParentImage": "\\leadingBackslash\\something.exe",
568 |   "Baz": "foo baz",
569 |   "Test": 123
570 | }
571 | `
572 | 
573 | type parseTestCase struct {
574 | 	ID              int
575 | 	Rule            string
576 | 	Pos, Neg        []string
577 | 	noCollapseWSNeg bool
578 | }
579 | 
580 | var parseTestCases = []parseTestCase{
581 | 	{
582 | 		ID:   1,
583 | 		Rule: detection1,
584 | 		Pos:  []string{detection1_positive},
585 | 		Neg:  []string{detection1_negative1, detection1_negative2},
586 | 	},
587 | 	{
588 | 		ID:   2,
589 | 		Rule: detection2,
590 | 		Pos:  []string{detection1_positive},
591 | 		Neg:  []string{detection1_negative1, detection1_negative2},
592 | 	},
593 | 	{
594 | 		ID:   3,
595 | 		Rule: detection3,
596 | 		Pos:  []string{detection3_positive1, detection3_positive2},
597 | 		Neg:  []string{detection3_negative},
598 | 	},
599 | 	{
600 | 		ID:   4,
601 | 		Rule: detection4,
602 | 		Pos:  []string{detection1_positive},
603 | 		Neg:  []string{detection1_negative1, detection1_negative2},
604 | 	},
605 | 	{
606 | 		ID:   5,
607 | 		Rule: detection5,
608 | 		Pos:  []string{detection3_positive1, detection3_positive2},
609 | 		Neg:  []string{detection3_negative},
610 | 	},
611 | 	{
612 | 		ID:   6,
613 | 		Rule: detection6,
614 | 		Pos:  []string{detection6_positive},
615 | 		Neg:  []string{detection6_negative},
616 | 	},
617 | 	{
618 | 		ID:   7,
619 | 		Rule: detection7,
620 | 		Pos:  []string{detection3_positive1, detection3_positive2},
621 | 		Neg:  []string{detection7_negative1, detection7_negative2},
622 | 	},
623 | 	{
624 | 		ID:   8,
625 | 		Rule: detection8,
626 | 		Pos:  []string{detection8_positive},
627 | 		Neg:  []string{detection8_negative1, detection8_negative2},
628 | 	},
629 | 	{
630 | 		ID:   9,
631 | 		Rule: detection9,
632 | 		Pos:  []string{detection9_positive},
633 | 		Neg:  []string{detection9_negative},
634 | 	},
635 | 	{
636 | 		ID:   10,
637 | 		Rule: detection10,
638 | 		Pos:  []string{detection10_positive},
639 | 		Neg:  []string{detection10_negative},
640 | 	},
641 | 	{
642 | 		ID:   11,
643 | 		Rule: detection11,
644 | 		Pos:  []string{detection11_positive},
645 | 		Neg:  []string{detection11_negative},
646 | 	},
647 | 	{
648 | 		ID:   12,
649 | 		Rule: detection12,
650 | 		Pos:  []string{detection12_positive},
651 | 		Neg:  []string{detection12_negative},
652 | 	},
653 | 	{
654 | 		ID:   13,
655 | 		Rule: detection13,
656 | 		Pos:  []string{detection13_positive, detection13_positive2, detection13_positive3, detection13_positive4},
657 | 		Neg:  []string{detection13_negative, detection13_negative2, detection13_negative3, detection13_negative4},
658 | 	},
659 | 	{
660 | 		ID:              14,
661 | 		Rule:            detection14,
662 | 		Pos:             []string{detection14_case},
663 | 		noCollapseWSNeg: false, // ensures whitespace is collapsed and everything matches
664 | 	},
665 | 	{
666 | 		ID:              14,
667 | 		Rule:            detection14,
668 | 		Neg:             []string{detection14_case},
669 | 		noCollapseWSNeg: true, // turns off whitespace collapsing and causing a non-match
670 | 	},
671 | 	{
672 | 		ID:   15,
673 | 		Rule: detection15,
674 | 		Pos:  []string{detection15_positive1, detection15_positive2},
675 | 		Neg:  []string{detection15_negative1, detection15_negative2, detection15_negative3, detection15_negative4},
676 | 	},
677 | }
678 | 
679 | func TestTokenCollect(t *testing.T) {
680 | 	for _, c := range LexPosCases {
681 | 		p := &parser{
682 | 			lex: lex(c.Expr),
683 | 		}
684 | 		if err := p.collect(); err != nil {
685 | 			switch err.(type) {
686 | 			case ErrUnsupportedToken:
687 | 			default:
688 | 				t.Fatal(err)
689 | 			}
690 | 		}
691 | 	}
692 | }
693 | 
694 | func TestParse(t *testing.T) {
695 | 	for _, c := range parseTestCases {
696 | 		var rule Rule
697 | 		if err := yaml.Unmarshal([]byte(c.Rule), &rule); err != nil {
698 | 			t.Fatalf("rule parse case %d failed to unmarshal yaml, %s", c.ID, err)
699 | 		}
700 | 		expr := rule.Detection["condition"].(string)
701 | 		p := &parser{
702 | 			lex:          lex(expr),
703 | 			sigma:        rule.Detection,
704 | 			noCollapseWS: c.noCollapseWSNeg,
705 | 		}
706 | 		if err := p.collect(); err != nil {
707 | 			t.Fatalf("rule parser case %d failed to collect lexical tokens, %s", c.ID, err)
708 | 		}
709 | 		if err := p.parse(); err != nil {
710 | 			switch err.(type) {
711 | 			case ErrWip:
712 | 				t.Fatalf("WIP")
713 | 			default:
714 | 				t.Fatalf("rule parser case %d failed to parse lexical tokens, %s", c.ID, err)
715 | 			}
716 | 		}
717 | 	}
718 | }
719 | 
720 | func TestSigmaEscape(t *testing.T) {
721 | 	tests := []struct {
722 | 		name       string
723 | 		input      string
724 | 		expected   string
725 | 		validMatch string
726 | 		skip       bool
727 | 	}{
728 | 		{
729 | 			name:       "No_Change",
730 | 			input:      `\\leadingBackslash\\*.exe`,
731 | 			expected:   `\\leadingBackslash\\*.exe`,
732 | 			validMatch: `\leadingBackslash\testing.exe`,
733 | 		},
734 | 		{
735 | 			name:       "Leading_Single_Backslash_Wildcard_After_Slash",
736 | 			input:      `\leadingBackslash\\*.exe`,
737 | 			expected:   `\\leadingBackslash\\*.exe`,
738 | 			validMatch: `\leadingBackslash\testing.exe`,
739 | 		},
740 | 		{
741 | 			name:       "Leading_Wildcard_Single_Backslash_Esc_Wildcard",
742 | 			input:      `*\bits\*admin.exe`,
743 | 			expected:   `*\\bits\*admin.exe`,
744 | 			validMatch: `leading\bits*admin.exe`,
745 | 		},
746 | 		{
747 | 			name:       "Double_Leading_Backslash_Single_Backslash_Wildcard",
748 | 			input:      `\\\\DoubleBackslash\some*.exe`,
749 | 			expected:   `\\\\DoubleBackslash\\some*.exe`,
750 | 			validMatch: `\\DoubleBackslash\sometMatch.exe`,
751 | 		},
752 | 		{
753 | 			name:       "Plaintext_Only_Esc_Wildcard",
754 | 			input:      `some\full\\\*plaintext.exe`,
755 | 			expected:   `some\\full\\\*plaintext.exe`,
756 | 			validMatch: `some\full\*plaintext.exe`,
757 | 		},
758 | 		{
759 | 			name:       "Double_Leading_Backslash_Complex_Mix_Esc",
760 | 			input:      `\\\\DoubleBackslash\?\some*Other\\*test.\\???`,
761 | 			expected:   `\\\\DoubleBackslash\?\\some*Other\\*test.\\???`,
762 | 			validMatch: `\\DoubleBackslash?\someMixOther\wildcardtest.\cmd`,
763 | 		},
764 | 		{
765 | 			name:       "Mixed_Wildcards_Single_Backslash_Brackets",
766 | 			input:      `[*]\*\aSetof\\\sigma{rule?}here*`,
767 | 			expected:   `\[*\]\*\\aSetof\\\\sigma\{rule?\}here*`,
768 | 			validMatch: `[testing]*\aSetof\\sigma{rules}hereWeGo`,
769 | 		},
770 | 	}
771 | 	for _, curTest := range tests {
772 | 		t.Run(curTest.name, func(t *testing.T) {
773 | 			if curTest.skip {
774 | 				t.Skip("test marked as skip")
775 | 			}
776 | 
777 | 			escStr := escapeSigmaForGlob(curTest.input)
778 | 			if escStr != curTest.expected {
779 | 				t.Errorf("failed to validate escaped input; got: %s - expected: %s", escStr, curTest.expected)
780 | 			}
781 | 
782 | 			// test as a glob to be sure
783 | 			globT, err := glob.Compile(escStr)
784 | 			if err != nil {
785 | 				t.Fatalf("failed to compile glob: %+v", err)
786 | 			}
787 | 			if !globT.Match(curTest.validMatch) {
788 | 				t.Errorf("compiled glob did NOT match valid input; glob: %s -- data: %s", escStr, curTest.validMatch)
789 | 			}
790 | 		})
791 | 	}
792 | }
793 | 


--------------------------------------------------------------------------------
/pattern.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"regexp"
  6 | 	"strings"
  7 | 
  8 | 	"github.com/gobwas/glob"
  9 | )
 10 | 
 11 | type TextPatternModifier int
 12 | 
 13 | const (
 14 | 	TextPatternNone TextPatternModifier = iota
 15 | 	TextPatternContains
 16 | 	TextPatternPrefix
 17 | 	TextPatternSuffix
 18 | 	TextPatternAll
 19 | 	TextPatternRegex
 20 | 	TextPatternKeyword
 21 | )
 22 | 
 23 | // func isValidSpecifier(in string) bool {
 24 | // 	return in == "contains" ||
 25 | // 		in == "endswith" ||
 26 | // 		in == "startswith"
 27 | // }
 28 | 
 29 | // NumMatcher is an atomic pattern for numeric item or list of items
 30 | type NumMatcher interface {
 31 | 	// NumMatch implements NumMatcher
 32 | 	NumMatch(int) bool
 33 | }
 34 | 
 35 | // NumMatchers holds multiple numeric matchers
 36 | type NumMatchers []NumMatcher
 37 | 
 38 | // NumMatch implements NumMatcher
 39 | func (n NumMatchers) NumMatch(val int) bool {
 40 | 	for _, v := range n {
 41 | 		if v.NumMatch(val) {
 42 | 			return true
 43 | 		}
 44 | 	}
 45 | 	return false
 46 | }
 47 | 
 48 | func NewNumMatcher(patterns ...int) (NumMatcher, error) {
 49 | 	if len(patterns) == 0 {
 50 | 		return nil, fmt.Errorf("no patterns defined for matcher object")
 51 | 	}
 52 | 	matcher := make(NumMatchers, 0)
 53 | 	for _, p := range patterns {
 54 | 		matcher = append(matcher, NumPattern{Val: p})
 55 | 	}
 56 | 
 57 | 	return func() NumMatcher {
 58 | 		if len(matcher) == 1 {
 59 | 			return matcher[0]
 60 | 		}
 61 | 		return matcher
 62 | 	}(), nil
 63 | }
 64 | 
 65 | // StringMatcher is an atomic pattern that could implement glob, literal or regex matchers
 66 | type StringMatcher interface {
 67 | 	// StringMatch implements StringMatcher
 68 | 	StringMatch(string) bool
 69 | }
 70 | 
 71 | var gWSCollapse = regexp.MustCompile(`\s+`)
 72 | 
 73 | // handleWhitespace takes str and if the global configuration for collapsing whitespace is NOT turned off
 74 | // returns the string with whitespace collapsed (1+ spaces, tabs, etc... become single space); otherwise
 75 | // just returns the unmodified str; this only applies to non-regex rules and data hitting non-regex rules
 76 | func handleWhitespace(str string, noCollapseWS bool) string {
 77 | 	if noCollapseWS { // do we collapse whitespace or not?  See config.NoCollapseWS (we collapse by default)
 78 | 		return str
 79 | 	}
 80 | 	return gWSCollapse.ReplaceAllString(str, " ")
 81 | }
 82 | 
 83 | const (
 84 | 	sigmaSpecialWildcard     = byte('*')
 85 | 	sigmaSpecialSingle       = byte('?')
 86 | 	sigmaSpecialEscape       = byte('\\')
 87 | 	globSpecialSqrBrktLeft   = byte('[')
 88 | 	globSpecialSqrBrktRight  = byte(']')
 89 | 	globSpecialCurlBrktLeft  = byte('{')
 90 | 	globSpecialCurlBrktRight = byte('}')
 91 | )
 92 | 
 93 | // Sigma has a different set of rules than the Glob library for escaping, so this function attempts to
 94 | // translate from Sigma escaping to gobwas/glob escaping.  For the most part we don't touch much of the
 95 | // escaped string; generally only when we see an unbalanced escape'd backslash (ex. '\' in Sigma needs to
 96 | // translated to '\\' for glob, '\\\' needs to translate to '\\\\', etc...).
 97 | //
 98 | // Generally we only need to really watch for runs of backslashes by themselves, in the case where you see
 99 | // a special character ('?' or '*') with an escape, any run of additional escapes should be valid by convention
100 | // (e.g. '\\*' per Sigma is an escaped backslash with a wildcard while '\\\*' is an escaped backslash and escaped
101 | // wildcard).
102 | //
103 | // Simga escaping rules per spec:
104 | //	* Plain backslash not followed by a wildcard can be expressed as single '\' or double backslash '\\'. For simplicity reasons the single notation is recommended.
105 | //	* A wildcard has to be escaped to handle it as a plain character: '\*'
106 | //	* The backslash before a wildcard has to be escaped to handle the value as a backslash followed by a wildcard: '\\*'
107 | //	* Three backslashes are necessary to escape both, the backslash and the wildcard and handle them as plain values: '\\\*'
108 | //	* Three or four backslashes are handled as double backslash. Four are recommended for consistency reasons: '\\\\' results in the plain value '\\'
109 | func escapeSigmaForGlob(str string) string {
110 | 	if str == "" { // quick out if empty
111 | 		return ""
112 | 	}
113 | 
114 | 	// special "quotemeta"-like functionality for brackets in glob (they should be treated as plaintext)
115 | 	isBracket := func(b byte) bool {
116 | 		return b == globSpecialSqrBrktLeft || b == globSpecialSqrBrktRight ||
117 | 			b == globSpecialCurlBrktLeft || b == globSpecialCurlBrktRight
118 | 	}
119 | 
120 | 	sLen := len(str)
121 | 	replStr := make([]byte, 2*sLen)
122 | 	x := (2 * sLen) - 1 // end of the replStr; we're working backwards
123 | 
124 | 	wildcard := false // we enter wildcard mode when we see a '?' or '*' and exit when we see something other than '\' or wildcard
125 | 	slashCnt := 0     // to simplify balancing runs of escaped backslashes (without wildcards), we just count the number we've seen in a row
126 | 	for i := (sLen - 1); i >= 0; i-- {
127 | 		switch str[i] {
128 | 		case sigmaSpecialWildcard, sigmaSpecialSingle: // wildcard is on when we see one of these characters
129 | 			wildcard = true
130 | 		case sigmaSpecialEscape: // character is an escape (backslash)
131 | 			if !wildcard { // if we're no in wildcard mode, count the number of slashes we're putting out to ensure they're balanced
132 | 				slashCnt++
133 | 			}
134 | 		default: // any other character, ensure wildcard mode is off
135 | 			wildcard = false
136 | 		}
137 | 
138 | 		// if we're no longer processing an escape character, check to see if we have a balanced count and if not, rebalance
139 | 		if str[i] != sigmaSpecialEscape && slashCnt > 0 {
140 | 			if (slashCnt % 2) != 0 {
141 | 				replStr[x] = sigmaSpecialEscape
142 | 				x-- // decrement x again as we're adding an extra char
143 | 			}
144 | 			slashCnt = 0
145 | 		}
146 | 
147 | 		replStr[x] = str[i] // copy our current character to the output
148 | 		x--
149 | 
150 | 		// special escape case for square/curly brackets; we need to escape these for glob
151 | 		// as they have a special meaning in the glob library but not in Sigma
152 | 		if isBracket(str[i]) {
153 | 			replStr[x] = sigmaSpecialEscape
154 | 			x-- // decrement x again as we're adding an extra char
155 | 		}
156 | 	}
157 | 
158 | 	// one last slash count before exiting to catch leading backslashes
159 | 	if (slashCnt % 2) != 0 {
160 | 		replStr[x] = sigmaSpecialEscape
161 | 	} else {
162 | 		x++ // for return, move back to the first valid characgter if we haven't added a compensating slash
163 | 	}
164 | 
165 | 	return string(replStr[x:])
166 | }
167 | 
168 | func NewStringMatcher(
169 | 	mod TextPatternModifier,
170 | 	lower, all, noCollapseWS bool,
171 | 	patterns ...string,
172 | ) (StringMatcher, error) {
173 | 	if len(patterns) == 0 {
174 | 		return nil, fmt.Errorf("no patterns defined for matcher object")
175 | 	}
176 | 	matcher := make([]StringMatcher, 0)
177 | 	for _, p := range patterns {
178 | 		// process modifiers first
179 | 		switch mod {
180 | 		case TextPatternRegex: // regex per spec
181 | 			re, err := regexp.Compile(p)
182 | 			if err != nil {
183 | 				return nil, err
184 | 			}
185 | 			matcher = append(matcher, RegexPattern{Re: re})
186 | 		case TextPatternContains: // contains: puts * wildcards around the values, such that the value is matched anywhere in the field.
187 | 			p = handleWhitespace(p, noCollapseWS)
188 | 			// In this condition, we need to ensure single backslashes, etc... are escaped correctly before throwing the globs on either side
189 | 			p = escapeSigmaForGlob(p)
190 | 			p = "*" + p + "*"
191 | 			globNG, err := glob.Compile(p)
192 | 			if err != nil {
193 | 				return nil, err
194 | 			}
195 | 			matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS})
196 | 		case TextPatternSuffix:
197 | 			p = handleWhitespace(p, noCollapseWS)
198 | 			matcher = append(matcher, SuffixPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS})
199 | 		case TextPatternPrefix:
200 | 			p = handleWhitespace(p, noCollapseWS)
201 | 			matcher = append(matcher, PrefixPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS})
202 | 		default:
203 | 			// no (supported) modifiers, handle non-spec regex, globs and regular values
204 | 			if strings.HasPrefix(p, "/") && strings.HasSuffix(p, "/") {
205 | 				re, err := regexp.Compile(strings.TrimLeft(strings.TrimRight(p, "/"), "/"))
206 | 				if err != nil {
207 | 					return nil, err
208 | 				}
209 | 				matcher = append(matcher, RegexPattern{Re: re})
210 | 			} else if mod == TextPatternKeyword {
211 | 				// this is a bit hacky, basically if the pattern coming in is a keyword and did not appear
212 | 				// to be a regex, always process it as a 'contains' style glob (can appear anywhere...)
213 | 				// this is due, I believe, on how keywords are generally handled, where it is likely a random
214 | 				// string or event long message that may have additional detail/etc...
215 | 				p = handleWhitespace(p, noCollapseWS)
216 | 				// In this condition, we need to ensure single backslashes, etc... are escaped correctly before throwing the globs on either side
217 | 				p = escapeSigmaForGlob(p)
218 | 				p = "*" + p + "*"
219 | 				globNG, err := glob.Compile(p)
220 | 				if err != nil {
221 | 					return nil, err
222 | 				}
223 | 				matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS})
224 | 			} else if strings.Contains(p, "*") {
225 | 				p = handleWhitespace(p, noCollapseWS)
226 | 				// Do NOT call QuoteMeta here as we're assuming the author knows what they're doing...
227 | 				p = escapeSigmaForGlob(p)
228 | 				globNG, err := glob.Compile(p)
229 | 				if err != nil {
230 | 					return nil, err
231 | 				}
232 | 				matcher = append(matcher, GlobPattern{Glob: &globNG, NoCollapseWS: noCollapseWS})
233 | 			} else {
234 | 				p = handleWhitespace(p, noCollapseWS)
235 | 				matcher = append(matcher, ContentPattern{Token: p, Lowercase: lower, NoCollapseWS: noCollapseWS})
236 | 			}
237 | 		}
238 | 	}
239 | 	return func() StringMatcher {
240 | 		if len(matcher) == 1 {
241 | 			return matcher[0]
242 | 		}
243 | 		if all {
244 | 			return StringMatchersConj(matcher).Optimize()
245 | 		}
246 | 		return StringMatchers(matcher).Optimize()
247 | 	}(), nil
248 | }
249 | 
250 | // StringMatchers holds multiple atomic matchers
251 | // Patterns are meant to be list of possibilities
252 | // thus, objects are joined with logical disjunctions
253 | type StringMatchers []StringMatcher
254 | 
255 | // StringMatch implements StringMatcher
256 | func (s StringMatchers) StringMatch(msg string) bool {
257 | 	for _, m := range s {
258 | 		// I thought about a type assertion here for handling whitespace
259 | 		// however, as we're dealing with non-pointer types, that may cause
260 | 		// some added overhead that we can avoid by just implementing where need to
261 | 		if m.StringMatch(msg) {
262 | 			return true
263 | 		}
264 | 	}
265 | 	return false
266 | }
267 | 
268 | // Optimize creates a new StringMatchers slice ordered by matcher type
269 | // First match wins, thus we can optimize by making sure fast string patterns
270 | // are executed first, then globs, and finally slow regular expressions
271 | func (s StringMatchers) Optimize() StringMatchers {
272 | 	return optimizeStringMatchers(s)
273 | }
274 | 
275 | // StringMatchersConj is similar to StringMatcher but elements are joined with
276 | // conjunction, i.e. all patterns must match
277 | // used to implement "all" specifier for selection types
278 | type StringMatchersConj []StringMatcher
279 | 
280 | // StringMatch implements StringMatcher
281 | func (s StringMatchersConj) StringMatch(msg string) bool {
282 | 	for _, m := range s {
283 | 		if !m.StringMatch(msg) {
284 | 			return false
285 | 		}
286 | 	}
287 | 	return true
288 | }
289 | 
290 | // Optimize creates a new StringMatchers slice ordered by matcher type
291 | // First match wins, thus we can optimize by making sure fast string patterns
292 | // are executed first, then globs, and finally slow regular expressions
293 | func (s StringMatchersConj) Optimize() StringMatchersConj {
294 | 	return optimizeStringMatchers(s)
295 | }
296 | 
297 | func optimizeStringMatchers(s []StringMatcher) []StringMatcher {
298 | 	globs := make([]StringMatcher, 0)
299 | 	re := make([]StringMatcher, 0)
300 | 	literals := make([]StringMatcher, 0)
301 | 	for _, pat := range s {
302 | 		switch pat.(type) {
303 | 		case ContentPattern, PrefixPattern, SuffixPattern:
304 | 			literals = append(literals, pat)
305 | 		case GlobPattern:
306 | 			globs = append(globs, pat)
307 | 		case RegexPattern:
308 | 			re = append(re, pat)
309 | 		}
310 | 	}
311 | 	return append(literals, append(globs, re...)...)
312 | }
313 | 
314 | // ContentPattern is a token for literal content matching
315 | type ContentPattern struct {
316 | 	Token        string
317 | 	Lowercase    bool
318 | 	NoCollapseWS bool
319 | }
320 | 
321 | // StringMatch implements StringMatcher
322 | func (c ContentPattern) StringMatch(msg string) bool {
323 | 	msg = handleWhitespace(msg, c.NoCollapseWS)
324 | 	return lowerCaseIfNeeded(msg, c.Lowercase) == lowerCaseIfNeeded(c.Token, c.Lowercase)
325 | }
326 | 
327 | // PrefixPattern is a token for literal content matching
328 | type PrefixPattern struct {
329 | 	Token        string
330 | 	Lowercase    bool
331 | 	NoCollapseWS bool
332 | }
333 | 
334 | // StringMatch implements StringMatcher
335 | func (c PrefixPattern) StringMatch(msg string) bool {
336 | 	msg = handleWhitespace(msg, c.NoCollapseWS)
337 | 	return strings.HasPrefix(
338 | 		lowerCaseIfNeeded(msg, c.Lowercase),
339 | 		lowerCaseIfNeeded(c.Token, c.Lowercase),
340 | 	)
341 | }
342 | 
343 | // SuffixPattern is a token for literal content matching
344 | type SuffixPattern struct {
345 | 	Token        string
346 | 	Lowercase    bool
347 | 	NoCollapseWS bool
348 | }
349 | 
350 | // StringMatch implements StringMatcher
351 | func (c SuffixPattern) StringMatch(msg string) bool {
352 | 	msg = handleWhitespace(msg, c.NoCollapseWS)
353 | 	return strings.HasSuffix(
354 | 		lowerCaseIfNeeded(msg, c.Lowercase),
355 | 		lowerCaseIfNeeded(c.Token, c.Lowercase),
356 | 	)
357 | }
358 | 
359 | // RegexPattern is for matching messages with regular expresions
360 | type RegexPattern struct {
361 | 	Re *regexp.Regexp
362 | }
363 | 
364 | // StringMatch implements StringMatcher
365 | func (r RegexPattern) StringMatch(msg string) bool {
366 | 	return r.Re.MatchString(msg)
367 | }
368 | 
369 | // GlobPattern is similar to ContentPattern but allows for asterisk wildcards
370 | type GlobPattern struct {
371 | 	Glob         *glob.Glob
372 | 	NoCollapseWS bool
373 | }
374 | 
375 | // StringMatch implements StringMatcher
376 | func (g GlobPattern) StringMatch(msg string) bool {
377 | 	msg = handleWhitespace(msg, g.NoCollapseWS)
378 | 	return (*g.Glob).Match(msg)
379 | }
380 | 
381 | // SimplePattern is a reference type to illustrate StringMatcher
382 | type SimplePattern struct {
383 | 	Token        string
384 | 	NoCollapseWS bool
385 | }
386 | 
387 | // StringMatch implements StringMatcher
388 | func (s SimplePattern) StringMatch(msg string) bool {
389 | 	msg = handleWhitespace(msg, s.NoCollapseWS)
390 | 	return strings.Contains(msg, s.Token)
391 | }
392 | 
393 | func lowerCaseIfNeeded(str string, lower bool) string {
394 | 	if lower {
395 | 		return strings.ToLower(str)
396 | 	}
397 | 	return str
398 | }
399 | 
400 | // NumPattern matches on numeric value
401 | type NumPattern struct {
402 | 	Val int
403 | }
404 | 
405 | // NumMatch implements NumMatcher
406 | func (n NumPattern) NumMatch(val int) bool {
407 | 	return n.Val == val
408 | }
409 | 


--------------------------------------------------------------------------------
/rule.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 
 11 | 	"gopkg.in/yaml.v2"
 12 | )
 13 | 
 14 | // RuleHandle is a meta object containing all fields from raw yaml, but is enhanced to also
 15 | // hold debugging info from the tool, such as source file path, etc
 16 | type RuleHandle struct {
 17 | 	Rule
 18 | 
 19 | 	Path         string `json:"path"`
 20 | 	Multipart    bool   `json:"multipart"`
 21 | 	NoCollapseWS bool   `json:"noCollapseWS"`
 22 | }
 23 | 
 24 | // Rule defines raw rule conforming to sigma rule specification
 25 | // https://github.com/Neo23x0/sigma/wiki/Specification
 26 | // only meant to be used for parsing yaml that matches Sigma rule definition
 27 | type Rule struct {
 28 | 	Author         string   `yaml:"author" json:"author"`
 29 | 	Description    string   `yaml:"description" json:"description"`
 30 | 	Falsepositives []string `yaml:"falsepositives" json:"falsepositives"`
 31 | 	Fields         []string `yaml:"fields" json:"fields"`
 32 | 	ID             string   `yaml:"id" json:"id"`
 33 | 	Level          string   `yaml:"level" json:"level"`
 34 | 	Title          string   `yaml:"title" json:"title"`
 35 | 	Status         string   `yaml:"status" json:"status"`
 36 | 	References     []string `yaml:"references" json:"references"`
 37 | 
 38 | 	Logsource `yaml:"logsource" json:"logsource"`
 39 | 	Detection `yaml:"detection" json:"detection"`
 40 | 	Tags      `yaml:"tags" json:"tags"`
 41 | }
 42 | 
 43 | // HasTags returns true if the rule contains all provided tags, otherwise false
 44 | func (r *Rule) HasTags(tags []string) bool {
 45 | 	lookup := make(map[string]bool, len(r.Tags))
 46 | 	for _, tag := range r.Tags {
 47 | 		lookup[tag] = true
 48 | 	}
 49 | 	for _, tag := range tags {
 50 | 		if _, ok := lookup[tag]; !ok {
 51 | 			return false
 52 | 		}
 53 | 	}
 54 | 	return true
 55 | }
 56 | 
 57 | // RuleFromYAML parses yaml data into Rule object
 58 | func RuleFromYAML(data []byte) (r Rule, err error) {
 59 | 	err = yaml.Unmarshal(data, &r)
 60 | 	return
 61 | }
 62 | 
 63 | // IsMultipart checks if rule is multipart
 64 | func IsMultipart(data []byte) bool {
 65 | 	return !bytes.HasPrefix(data, []byte("---")) && bytes.Contains(data, []byte("---"))
 66 | }
 67 | 
 68 | // NewRuleList 	reads a list of sigma rule paths and parses them to rule objects
 69 | func NewRuleList(files []string, skip, noCollapseWS bool, tags []string) ([]RuleHandle, error) {
 70 | 	if len(files) == 0 {
 71 | 		return nil, fmt.Errorf("missing rule file list")
 72 | 	}
 73 | 	errs := make([]ErrParseYaml, 0)
 74 | 	rules := make([]RuleHandle, 0)
 75 | loop:
 76 | 	for i, path := range files {
 77 | 		data, err := os.ReadFile(path)
 78 | 		if err != nil {
 79 | 			return nil, err
 80 | 		}
 81 | 		r, err := RuleFromYAML(data)
 82 | 		if err != nil {
 83 | 			if skip {
 84 | 				errs = append(errs, ErrParseYaml{
 85 | 					Path:  path,
 86 | 					Count: i,
 87 | 					Err:   err,
 88 | 				})
 89 | 				continue loop
 90 | 			}
 91 | 			return nil, &ErrParseYaml{Err: err, Path: path}
 92 | 		}
 93 | 
 94 | 		if !r.HasTags(tags) {
 95 | 			continue loop
 96 | 		}
 97 | 
 98 | 		rules = append(rules, RuleHandle{
 99 | 			Path:         path,
100 | 			Rule:         r,
101 | 			NoCollapseWS: noCollapseWS,
102 | 			Multipart:    IsMultipart(data),
103 | 		})
104 | 	}
105 | 	return rules, func() error {
106 | 		if len(errs) > 0 {
107 | 			return ErrBulkParseYaml{Errs: errs}
108 | 		}
109 | 		return nil
110 | 	}()
111 | }
112 | 
113 | // Logsource represents the logsource field in sigma rule
114 | // It defines relevant event streams and is used for pre-filtering
115 | type Logsource struct {
116 | 	Product    string `yaml:"product" json:"product"`
117 | 	Category   string `yaml:"category" json:"category"`
118 | 	Service    string `yaml:"service" json:"service"`
119 | 	Definition string `yaml:"definition" json:"definition"`
120 | }
121 | 
122 | // Detection represents the detection field in sigma rule
123 | // contains condition expression and identifier fields for building AST
124 | type Detection map[string]interface{}
125 | 
126 | func (d Detection) Extract() map[string]interface{} {
127 | 	tx := make(map[string]interface{})
128 | 	for k, v := range d {
129 | 		if k != "condition" {
130 | 			tx[k] = v
131 | 		}
132 | 	}
133 | 	return tx
134 | }
135 | 
136 | // Tags contains a metadata list for tying positive matches together with other threat intel sources
137 | // For example, for attaching MITRE ATT&CK tactics or techniques to the event
138 | type Tags []string
139 | 
140 | // Result is an object returned on positive sigma match
141 | type Result struct {
142 | 	Tags `json:"tags"`
143 | 
144 | 	ID          string `json:"id"`
145 | 	Title       string `json:"title"`
146 | 	Description string `json:"description"`
147 | }
148 | 
149 | // Results should be returned when single event matches multiple rules
150 | type Results []Result
151 | 
152 | // NewRuleFileList finds all yaml files from defined root directories
153 | // Subtree is scanned recursively
154 | // No file validation, other than suffix matching
155 | func NewRuleFileList(dirs []string) ([]string, error) {
156 | 	if len(dirs) == 0 {
157 | 		return nil, errors.New("rule directories undefined")
158 | 	}
159 | 	out := make([]string, 0)
160 | 	for _, dir := range dirs {
161 | 		if err := filepath.Walk(dir, func(
162 | 			path string,
163 | 			info os.FileInfo,
164 | 			err error,
165 | 		) error {
166 | 			if !info.IsDir() && strings.HasSuffix(path, "yml") {
167 | 				out = append(out, path)
168 | 			}
169 | 			return err
170 | 		}); err != nil {
171 | 			return out, err
172 | 		}
173 | 	}
174 | 	return out, nil
175 | }
176 | 


--------------------------------------------------------------------------------
/ruleset.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"sync"
  7 | )
  8 | 
  9 | // Config is used as argument to creating a new ruleset
 10 | type Config struct {
 11 | 	// root directory for recursive rule search
 12 | 	// rules must be readable files with "yml" suffix
 13 | 	Directory []string
 14 | 	// by default, a rule parse fail will simply increment Ruleset.Failed counter when failing to
 15 | 	// parse yaml or rule AST
 16 | 	// this parameter will cause an early error return instead
 17 | 	FailOnRuleParse, FailOnYamlParse bool
 18 | 	// by default, we will collapse whitespace for both rules and data of non-regex rules and non-regex compared data
 19 | 	// setthig this to true turns that behavior off
 20 | 	NoCollapseWS bool
 21 | }
 22 | 
 23 | func (c Config) validate() error {
 24 | 	if c.Directory == nil || len(c.Directory) == 0 {
 25 | 		return fmt.Errorf("missing root directory for sigma rules")
 26 | 	}
 27 | 	for _, dir := range c.Directory {
 28 | 		info, err := os.Stat(dir)
 29 | 		if os.IsNotExist(err) {
 30 | 			return fmt.Errorf("%s does not exist", dir)
 31 | 		}
 32 | 		if !info.IsDir() {
 33 | 			return fmt.Errorf("%s is not a directory", dir)
 34 | 		}
 35 | 	}
 36 | 	return nil
 37 | }
 38 | 
 39 | // Ruleset is a collection of rules
 40 | type Ruleset struct {
 41 | 	mu *sync.RWMutex
 42 | 
 43 | 	Rules []*Tree
 44 | 	root  []string
 45 | 
 46 | 	Total, Ok, Failed, Unsupported int
 47 | }
 48 | 
 49 | // NewRuleset instanciates a Ruleset object
 50 | func NewRuleset(c Config, tags []string) (*Ruleset, error) {
 51 | 	if err := c.validate(); err != nil {
 52 | 		return nil, err
 53 | 	}
 54 | 	files, err := NewRuleFileList(c.Directory)
 55 | 	if err != nil {
 56 | 		return nil, err
 57 | 	}
 58 | 	var fail int
 59 | 	rules, err := NewRuleList(files, !c.FailOnYamlParse, c.NoCollapseWS, tags)
 60 | 	if err != nil {
 61 | 		switch e := err.(type) {
 62 | 		case ErrBulkParseYaml:
 63 | 			fail += len(e.Errs)
 64 | 		default:
 65 | 			return nil, err
 66 | 		}
 67 | 	}
 68 | 	result := RulesetFromRuleList(rules)
 69 | 	result.root = c.Directory
 70 | 	result.Failed += fail
 71 | 	result.Total += fail
 72 | 	return result, nil
 73 | }
 74 | 
 75 | func RulesetFromRuleList(rules []RuleHandle) *Ruleset {
 76 | 	var fail, unsupp int
 77 | 	set := make([]*Tree, 0)
 78 | loop:
 79 | 	for _, raw := range rules {
 80 | 		if raw.Multipart {
 81 | 			unsupp++
 82 | 			continue loop
 83 | 		}
 84 | 		tree, err := NewTree(raw)
 85 | 		if err != nil {
 86 | 			switch err.(type) {
 87 | 			case ErrUnsupportedToken, *ErrUnsupportedToken:
 88 | 				unsupp++
 89 | 			default:
 90 | 				fail++
 91 | 			}
 92 | 			continue loop
 93 | 		}
 94 | 		set = append(set, tree)
 95 | 	}
 96 | 	return &Ruleset{
 97 | 		mu:          &sync.RWMutex{},
 98 | 		Rules:       set,
 99 | 		Failed:      fail,
100 | 		Ok:          len(set),
101 | 		Unsupported: unsupp,
102 | 		Total:       len(rules),
103 | 	}
104 | }
105 | 
106 | func (r *Ruleset) EvalAll(e Event) (Results, bool) {
107 | 	r.mu.RLock()
108 | 	defer r.mu.RUnlock()
109 | 	results := make(Results, 0)
110 | 	for _, rule := range r.Rules {
111 | 		if res, match := rule.Eval(e); match {
112 | 			results = append(results, *res)
113 | 		}
114 | 	}
115 | 	if len(results) > 0 {
116 | 		return results, true
117 | 	}
118 | 	return nil, false
119 | }
120 | 


--------------------------------------------------------------------------------
/sigma.go:
--------------------------------------------------------------------------------
 1 | package sigma
 2 | 
 3 | // Keyworder implements keywords sigma rule type on arbitrary event
 4 | // Should return list of fields that are relevant for rule matching
 5 | type Keyworder interface {
 6 | 	// Keywords implements Keyworder
 7 | 	Keywords() ([]string, bool)
 8 | }
 9 | 
10 | // Selector implements selection sigma rule type
11 | type Selector interface {
12 | 	// Select implements Selector
13 | 	Select(string) (interface{}, bool)
14 | }
15 | 
16 | // Event implements sigma rule types by embedding Keyworder and Selector
17 | // Used by rules to extract relevant fields
18 | type Event interface {
19 | 	Keyworder
20 | 	Selector
21 | }
22 | 
23 | // Matcher is used for implementing Abstract Syntax Tree for Sigma engine
24 | type Matcher interface {
25 | 	// Match implements Matcher
26 | 	Match(Event) (bool, bool)
27 | }
28 | 
29 | // Branch implements Matcher with additional methods for walking and debugging the tree
30 | type Branch interface {
31 | 	Matcher
32 | 
33 | 	// Self returns Node or final rule object for debugging and/or walking the tree
34 | 	// Must be type switched externally
35 | 	// Self() interface{}
36 | }
37 | 


--------------------------------------------------------------------------------
/token.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"context"
  5 | 
  6 | 	"github.com/gobwas/glob"
  7 | )
  8 | 
  9 | var eof = rune(0)
 10 | 
 11 | // Item is lexical token along with respective plaintext value
 12 | // Item is communicated between lexer and parser
 13 | type Item struct {
 14 | 	T            Token
 15 | 	Val          string
 16 | 	globVal      *glob.Glob // Do NOT access directly, us the Item.Glob() function instead
 17 | 	globCompFail bool       // prevents us from trying to re-compile a failed globVal over and over...
 18 | }
 19 | 
 20 | func (i Item) String() string { return i.Val }
 21 | 
 22 | // Item.Glob() - Wraps getting the compiled glob of Item.Val to ensure it is compiled properly.
 23 | // Do NOT access globVal directly as it won't be compiled until the first call to Item.Glob()
 24 | func (i *Item) Glob() *glob.Glob {
 25 | 	if i.globVal == nil && !i.globCompFail {
 26 | 		newVal := escapeSigmaForGlob(i.Val)
 27 | 		newGlob, err := glob.Compile(newVal)
 28 | 		if err != nil {
 29 | 			i.globCompFail = true
 30 | 			return nil
 31 | 		}
 32 | 		i.globVal = &newGlob
 33 | 	}
 34 | 
 35 | 	return i.globVal
 36 | }
 37 | 
 38 | func genItems(t []Item) <-chan Item {
 39 | 	tx := make(chan Item) // unbuffered
 40 | 	go func(ctx context.Context) {
 41 | 		defer close(tx)
 42 | 		for _, item := range t {
 43 | 			tx <- item
 44 | 		}
 45 | 	}(context.TODO())
 46 | 	return tx
 47 | }
 48 | 
 49 | // Token is a lexical token extracted from condition field
 50 | type Token int
 51 | 
 52 | const (
 53 | 	TokBegin Token = iota
 54 | 
 55 | 	// Helpers for internal stuff
 56 | 	TokErr
 57 | 	TokUnsupp
 58 | 	TokNil
 59 | 
 60 | 	// user-defined word
 61 | 	TokIdentifier
 62 | 	TokIdentifierWithWildcard
 63 | 	TokIdentifierAll
 64 | 
 65 | 	// Literals
 66 | 	TokLitEof
 67 | 
 68 | 	// Separators
 69 | 	TokSepLpar
 70 | 	TokSepRpar
 71 | 	TokSepPipe
 72 | 
 73 | 	// Operators
 74 | 	TokOpEq
 75 | 	TokOpGt
 76 | 	TokOpGte
 77 | 	TokOpLt
 78 | 	TokOpLte
 79 | 
 80 | 	// Keywords
 81 | 	TokKeywordAnd
 82 | 	TokKeywordOr
 83 | 	TokKeywordNot
 84 | 	TokKeywordAgg
 85 | 
 86 | 	// TODO
 87 | 	TokKeywordNear
 88 | 	TokKeywordBy
 89 | 
 90 | 	// Statements
 91 | 	TokStOne
 92 | 	TokStAll
 93 | )
 94 | 
 95 | // String documents human readable textual value of token
 96 | // For visual debugging, so symbols will be written out and everything is uppercased
 97 | func (t Token) String() string {
 98 | 	switch t {
 99 | 	case TokIdentifier:
100 | 		return "IDENT"
101 | 	case TokIdentifierWithWildcard:
102 | 		return "WILDCARDIDENT"
103 | 	case TokIdentifierAll:
104 | 		return "THEM"
105 | 	case TokSepLpar:
106 | 		return "LPAR"
107 | 	case TokSepRpar:
108 | 		return "RPAR"
109 | 	case TokSepPipe:
110 | 		return "PIPE"
111 | 	case TokOpEq:
112 | 		return "EQ"
113 | 	case TokOpGt:
114 | 		return "GT"
115 | 	case TokOpGte:
116 | 		return "GTE"
117 | 	case TokOpLt:
118 | 		return "LT"
119 | 	case TokOpLte:
120 | 		return "LTE"
121 | 	case TokKeywordAnd:
122 | 		return "AND"
123 | 	case TokKeywordOr:
124 | 		return "OR"
125 | 	case TokKeywordNot:
126 | 		return "NOT"
127 | 	case TokStAll:
128 | 		return "ALL"
129 | 	case TokStOne:
130 | 		return "ONE"
131 | 	case TokKeywordAgg:
132 | 		return "AGG"
133 | 	case TokLitEof:
134 | 		return "EOF"
135 | 	case TokErr:
136 | 		return "ERR"
137 | 	case TokUnsupp:
138 | 		return "UNSUPPORTED"
139 | 	case TokBegin:
140 | 		return "BEGINNING"
141 | 	case TokNil:
142 | 		return "NIL"
143 | 	default:
144 | 		return "Unk"
145 | 	}
146 | }
147 | 
148 | // Literal documents plaintext values of a token
149 | // Uses special symbols and expressions, as used in a rule
150 | func (t Token) Literal() string {
151 | 	switch t {
152 | 	case TokIdentifier, TokIdentifierWithWildcard:
153 | 		return "keywords"
154 | 	case TokIdentifierAll:
155 | 		return "them"
156 | 	case TokSepLpar:
157 | 		return "("
158 | 	case TokSepRpar:
159 | 		return ")"
160 | 	case TokSepPipe:
161 | 		return "|"
162 | 	case TokOpEq:
163 | 		return "="
164 | 	case TokOpGt:
165 | 		return ">"
166 | 	case TokOpGte:
167 | 		return ">="
168 | 	case TokOpLt:
169 | 		return "<"
170 | 	case TokOpLte:
171 | 		return "<="
172 | 	case TokKeywordAnd:
173 | 		return "and"
174 | 	case TokKeywordOr:
175 | 		return "or"
176 | 	case TokKeywordNot:
177 | 		return "not"
178 | 	case TokStAll:
179 | 		return "all of"
180 | 	case TokStOne:
181 | 		return "1 of"
182 | 	case TokLitEof, TokNil:
183 | 		return ""
184 | 	default:
185 | 		return "Err"
186 | 	}
187 | }
188 | 
189 | // Rune returns UTF-8 numeric value of symbol
190 | func (t Token) Rune() rune {
191 | 	switch t {
192 | 	case TokSepLpar:
193 | 		return '('
194 | 	case TokSepRpar:
195 | 		return ')'
196 | 	case TokSepPipe:
197 | 		return '|'
198 | 	default:
199 | 		return eof
200 | 	}
201 | }
202 | 
203 | // validTokenSequence detects invalid token sequences
204 | // not meant to be a perfect validator, simply a quick check before parsing
205 | func validTokenSequence(t1, t2 Token) bool {
206 | 	switch t2 {
207 | 	case TokStAll, TokStOne:
208 | 		switch t1 {
209 | 		case TokBegin, TokSepLpar, TokKeywordAnd, TokKeywordOr, TokKeywordNot:
210 | 			return true
211 | 		}
212 | 	case TokIdentifierAll:
213 | 		switch t1 {
214 | 		case TokStAll, TokStOne:
215 | 			return true
216 | 		}
217 | 	case TokIdentifier, TokIdentifierWithWildcard:
218 | 		switch t1 {
219 | 		case TokSepLpar, TokBegin, TokKeywordAnd, TokKeywordOr, TokKeywordNot, TokStOne, TokStAll:
220 | 			return true
221 | 		}
222 | 	case TokKeywordAnd, TokKeywordOr:
223 | 		switch t1 {
224 | 		case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepRpar:
225 | 			return true
226 | 		}
227 | 	case TokKeywordNot:
228 | 		switch t1 {
229 | 		case TokKeywordAnd, TokKeywordOr, TokSepLpar, TokBegin:
230 | 			return true
231 | 		}
232 | 	case TokSepLpar:
233 | 		switch t1 {
234 | 		case TokKeywordAnd, TokKeywordOr, TokKeywordNot, TokBegin, TokSepLpar:
235 | 			return true
236 | 		}
237 | 	case TokSepRpar:
238 | 		switch t1 {
239 | 		case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepLpar, TokSepRpar:
240 | 			return true
241 | 		}
242 | 	case TokLitEof:
243 | 		switch t1 {
244 | 		case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepRpar:
245 | 			return true
246 | 		}
247 | 	case TokSepPipe:
248 | 		switch t1 {
249 | 		case TokIdentifier, TokIdentifierAll, TokIdentifierWithWildcard, TokSepRpar:
250 | 			return true
251 | 		}
252 | 	}
253 | 	return false
254 | }
255 | 


--------------------------------------------------------------------------------
/tree.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/gobwas/glob"
  7 | )
  8 | 
  9 | // Tree represents the full AST for a sigma rule
 10 | type Tree struct {
 11 | 	Root Branch
 12 | 	Rule *RuleHandle
 13 | }
 14 | 
 15 | // Match implements Matcher
 16 | func (t Tree) Match(e Event) (bool, bool) {
 17 | 	return t.Root.Match(e)
 18 | }
 19 | 
 20 | func (t Tree) Eval(e Event) (*Result, bool) {
 21 | 	match, applicable := t.Match(e)
 22 | 	if !applicable {
 23 | 		return nil, false
 24 | 	}
 25 | 	if t.Rule == nil && match {
 26 | 		return &Result{}, true
 27 | 	}
 28 | 	if match {
 29 | 		return &Result{
 30 | 			ID:          t.Rule.ID,
 31 | 			Title:       t.Rule.Title,
 32 | 			Tags:        t.Rule.Tags,
 33 | 			Description: t.Rule.Description,
 34 | 		}, true
 35 | 	}
 36 | 	return nil, false
 37 | }
 38 | 
 39 | // NewTree parses rule handle into an abstract syntax tree
 40 | func NewTree(r RuleHandle) (*Tree, error) {
 41 | 	if r.Detection == nil {
 42 | 		return nil, ErrMissingDetection{}
 43 | 	}
 44 | 	expr, ok := r.Detection["condition"].(string)
 45 | 	if !ok {
 46 | 		return nil, ErrMissingCondition{}
 47 | 	}
 48 | 
 49 | 	p := &parser{
 50 | 		lex:          lex(expr),
 51 | 		condition:    expr,
 52 | 		sigma:        r.Detection,
 53 | 		noCollapseWS: r.NoCollapseWS,
 54 | 	}
 55 | 	if err := p.run(); err != nil {
 56 | 		return nil, err
 57 | 	}
 58 | 	t := &Tree{
 59 | 		Root: p.result,
 60 | 		Rule: &r,
 61 | 	}
 62 | 	return t, nil
 63 | }
 64 | 
 65 | // newBranch builds a binary tree from token list
 66 | // sequence and group validation should be done before invoking newBranch
 67 | func newBranch(d Detection, t []Item, depth int, noCollapseWS bool) (Branch, error) {
 68 | 	rx := genItems(t)
 69 | 
 70 | 	and := make(NodeSimpleAnd, 0)
 71 | 	or := make(NodeSimpleOr, 0)
 72 | 	var negated bool
 73 | 	var wildcard Token
 74 | 
 75 | 	for item := range rx {
 76 | 		switch item.T {
 77 | 		case TokIdentifier:
 78 | 			val, ok := d[item.Val]
 79 | 			if !ok {
 80 | 				return nil, ErrMissingConditionItem{Key: item.Val}
 81 | 			}
 82 | 			b, err := newRuleFromIdent(val, checkIdentType(item.Val, val), noCollapseWS)
 83 | 			if err != nil {
 84 | 				return nil, err
 85 | 			}
 86 | 			and = append(and, newNodeNotIfNegated(b, negated))
 87 | 			negated = false
 88 | 		case TokKeywordAnd:
 89 | 			// no need to do anything special here
 90 | 		case TokKeywordOr:
 91 | 			// fill OR gate with collected AND nodes
 92 | 			// reduce will strip AND logic if only one token has been collected
 93 | 			or = append(or, and.Reduce())
 94 | 			// reset existing AND collector
 95 | 			and = make(NodeSimpleAnd, 0)
 96 | 		case TokKeywordNot:
 97 | 			negated = true
 98 | 		case TokSepLpar:
 99 | 			// recursively create new branch and append to existing list
100 | 			// then skip to next token after grouping
101 | 			b, err := newBranch(d, extractGroup(rx), depth+1, noCollapseWS)
102 | 			if err != nil {
103 | 				return nil, err
104 | 			}
105 | 			and = append(and, newNodeNotIfNegated(b, negated))
106 | 			negated = false
107 | 		case TokIdentifierAll:
108 | 			switch wildcard {
109 | 			case TokStAll:
110 | 				rules, err := extractAllToRules(d, noCollapseWS)
111 | 				if err != nil {
112 | 					return nil, err
113 | 				}
114 | 				and = append(and, newNodeNotIfNegated(NodeSimpleAnd(rules), negated))
115 | 				negated = false
116 | 			case TokStOne:
117 | 				rules, err := extractAllToRules(d, noCollapseWS)
118 | 				if err != nil {
119 | 					return nil, err
120 | 				}
121 | 				and = append(and, newNodeNotIfNegated(NodeSimpleOr(rules), negated))
122 | 				negated = false
123 | 			default:
124 | 				return nil, fmt.Errorf("invalid wildcard ident, missing 1 of/ all of prefix")
125 | 			}
126 | 		case TokIdentifierWithWildcard:
127 | 			switch wildcard {
128 | 			case TokStAll:
129 | 				// build logical conjunction
130 | 				rules, err := extractAndBuildBranches(d, item.Glob(), noCollapseWS)
131 | 				if err != nil {
132 | 					return nil, fmt.Errorf("failed to extract and build branch for '%s': %s", item, err)
133 | 				}
134 | 				and = append(and, newNodeNotIfNegated(NodeSimpleAnd(rules), negated))
135 | 				negated = false
136 | 			case TokStOne:
137 | 				// build logical disjunction
138 | 				rules, err := extractAndBuildBranches(d, item.Glob(), noCollapseWS)
139 | 				if err != nil {
140 | 					return nil, fmt.Errorf("failed to extract and build branch for '%s': %s", item, err)
141 | 				}
142 | 				and = append(and, newNodeNotIfNegated(NodeSimpleOr(rules), negated))
143 | 				negated = false
144 | 			default:
145 | 				// invalid case, did not see 1of/allof statement before wildcard ident
146 | 				return nil, fmt.Errorf("invalid wildcard ident, missing 1 of/ all of prefix")
147 | 			}
148 | 			wildcard = TokBegin
149 | 		case TokStAll:
150 | 			wildcard = TokStAll
151 | 		case TokStOne:
152 | 			wildcard = TokStOne
153 | 		case TokSepRpar:
154 | 			return nil, fmt.Errorf("parser error, should not see %s",
155 | 				TokSepRpar)
156 | 		default:
157 | 			return nil, ErrUnsupportedToken{
158 | 				Msg: fmt.Sprintf("%s | %s", item.T, item.T.Literal()),
159 | 			}
160 | 		}
161 | 	}
162 | 	or = append(or, newNodeNotIfNegated(and.Reduce(), negated))
163 | 
164 | 	return or.Reduce(), nil
165 | }
166 | 
167 | func extractGroup(rx <-chan Item) []Item {
168 | 	// fn is called when newBranch hits TokSepLpar
169 | 	// it will be consumed, so balance is already 1
170 | 	balance := 1
171 | 	group := make([]Item, 0)
172 | 	for item := range rx {
173 | 		if balance > 0 {
174 | 			group = append(group, item)
175 | 		}
176 | 		switch item.T {
177 | 		case TokSepLpar:
178 | 			balance++
179 | 		case TokSepRpar:
180 | 			balance--
181 | 			if balance == 0 {
182 | 				return group[:len(group)-1]
183 | 			}
184 | 		default:
185 | 		}
186 | 	}
187 | 	return group
188 | }
189 | 
190 | func extractAndBuildBranches(d Detection, g *glob.Glob, noCollapseWS bool) ([]Branch, error) {
191 | 	vals, err := extractWildcardIdents(d, g)
192 | 	if err != nil {
193 | 		return nil, err
194 | 	}
195 | 	rules := make(NodeSimpleAnd, len(vals))
196 | 	for i, v := range vals {
197 | 		b, err := newRuleFromIdent(v, identSelection, noCollapseWS)
198 | 		if err != nil {
199 | 			return nil, err
200 | 		}
201 | 		rules[i] = b
202 | 	}
203 | 	return rules, nil
204 | }
205 | 
206 | func extractWildcardIdents(d Detection, g *glob.Glob) ([]interface{}, error) {
207 | 	if g == nil {
208 | 		return nil, fmt.Errorf("passed glob was nil (failed to compile)")
209 | 	}
210 | 	rules := make([]interface{}, 0)
211 | 	for k, v := range d {
212 | 		if (*g).Match(k) {
213 | 			rules = append(rules, v)
214 | 		}
215 | 	}
216 | 	if len(rules) == 0 {
217 | 		return nil, fmt.Errorf("ident did not match any values")
218 | 	}
219 | 	return rules, nil
220 | }
221 | 
222 | func extractAllToRules(d Detection, noCollapseWS bool) ([]Branch, error) {
223 | 	rules := make([]Branch, 0)
224 | 	for k, v := range d.Extract() {
225 | 		b, err := newRuleFromIdent(v, checkIdentType(k, v), noCollapseWS)
226 | 		if err != nil {
227 | 			return nil, err
228 | 		}
229 | 		rules = append(rules, b)
230 | 	}
231 | 	return rules, nil
232 | }
233 | 


--------------------------------------------------------------------------------
/tree_test.go:
--------------------------------------------------------------------------------
  1 | package sigma
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/markuskont/datamodels"
  8 | 	"gopkg.in/yaml.v2"
  9 | )
 10 | 
 11 | func TestTreeParse(t *testing.T) {
 12 | 	for _, c := range parseTestCases {
 13 | 		var rule Rule
 14 | 		if err := yaml.Unmarshal([]byte(c.Rule), &rule); err != nil {
 15 | 			t.Fatalf("tree parse case %d failed to unmarshal yaml, %s", c.ID, err)
 16 | 		}
 17 | 		p, err := NewTree(RuleHandle{Rule: rule, NoCollapseWS: c.noCollapseWSNeg})
 18 | 		if err != nil {
 19 | 			t.Fatalf("tree parse case %d failed: %s", c.ID, err)
 20 | 		}
 21 | 		// Positive cases
 22 | 		for i, c2 := range c.Pos {
 23 | 			var obj datamodels.Map
 24 | 			if err := json.Unmarshal([]byte(c2), &obj); err != nil {
 25 | 				t.Fatalf("rule parser case %d positive case %d json unmarshal error %s", c.ID, i, err)
 26 | 			}
 27 | 			m, _ := p.Match(obj)
 28 | 			if !m {
 29 | 				t.Fatalf("rule parser case %d positive case %d did not match", c.ID, i)
 30 | 			}
 31 | 		}
 32 | 		// Negative cases
 33 | 		for i, c2 := range c.Neg {
 34 | 			var obj datamodels.Map
 35 | 			if err := json.Unmarshal([]byte(c2), &obj); err != nil {
 36 | 				t.Fatalf("rule parser case %d positive case %d json unmarshal error %s", c.ID, i, err)
 37 | 			}
 38 | 			m, _ := p.Match(obj)
 39 | 			if m {
 40 | 				t.Fatalf("rule parser case %d negative case %d matched", c.ID, i)
 41 | 			}
 42 | 		}
 43 | 	}
 44 | }
 45 | 
 46 | // we should probably add an alternative to this benchmark to include noCollapseWS on or off (we collapse by default now)
 47 | func benchmarkCase(b *testing.B, rawRule, rawEvent string) {
 48 | 	var rule Rule
 49 | 	if err := yaml.Unmarshal([]byte(parseTestCases[0].Rule), &rule); err != nil {
 50 | 		b.Fail()
 51 | 	}
 52 | 	p, err := NewTree(RuleHandle{Rule: rule})
 53 | 	if err != nil {
 54 | 		b.Fail()
 55 | 	}
 56 | 	var event datamodels.Map
 57 | 	if err := json.Unmarshal([]byte(parseTestCases[0].Pos[0]), &event); err != nil {
 58 | 		b.Fail()
 59 | 	}
 60 | 	for i := 0; i < b.N; i++ {
 61 | 		p.Match(event)
 62 | 	}
 63 | }
 64 | 
 65 | func BenchmarkTreePositive0(b *testing.B) {
 66 | 	benchmarkCase(b, parseTestCases[0].Rule, parseTestCases[0].Pos[0])
 67 | }
 68 | 
 69 | func BenchmarkTreePositive1(b *testing.B) {
 70 | 	benchmarkCase(b, parseTestCases[1].Rule, parseTestCases[1].Pos[0])
 71 | }
 72 | 
 73 | func BenchmarkTreePositive2(b *testing.B) {
 74 | 	benchmarkCase(b, parseTestCases[2].Rule, parseTestCases[2].Pos[0])
 75 | }
 76 | 
 77 | func BenchmarkTreePositive3(b *testing.B) {
 78 | 	benchmarkCase(b, parseTestCases[3].Rule, parseTestCases[3].Pos[0])
 79 | }
 80 | 
 81 | func BenchmarkTreePositive4(b *testing.B) {
 82 | 	benchmarkCase(b, parseTestCases[4].Rule, parseTestCases[4].Pos[0])
 83 | }
 84 | 
 85 | func BenchmarkTreePositive5(b *testing.B) {
 86 | 	benchmarkCase(b, parseTestCases[5].Rule, parseTestCases[6].Pos[0])
 87 | }
 88 | 
 89 | func BenchmarkTreePositive6(b *testing.B) {
 90 | 	benchmarkCase(b, parseTestCases[6].Rule, parseTestCases[6].Pos[0])
 91 | }
 92 | 
 93 | func BenchmarkTreeNegative0(b *testing.B) {
 94 | 	benchmarkCase(b, parseTestCases[0].Rule, parseTestCases[0].Neg[0])
 95 | }
 96 | 
 97 | func BenchmarkTreeNegative1(b *testing.B) {
 98 | 	benchmarkCase(b, parseTestCases[1].Rule, parseTestCases[1].Neg[0])
 99 | }
100 | 
101 | func BenchmarkTreeNegative2(b *testing.B) {
102 | 	benchmarkCase(b, parseTestCases[2].Rule, parseTestCases[2].Neg[0])
103 | }
104 | 
105 | func BenchmarkTreeNegative3(b *testing.B) {
106 | 	benchmarkCase(b, parseTestCases[3].Rule, parseTestCases[3].Neg[0])
107 | }
108 | 
109 | func BenchmarkTreeNegative4(b *testing.B) {
110 | 	benchmarkCase(b, parseTestCases[4].Rule, parseTestCases[4].Neg[0])
111 | }
112 | 
113 | func BenchmarkTreeNegative5(b *testing.B) {
114 | 	benchmarkCase(b, parseTestCases[5].Rule, parseTestCases[6].Neg[0])
115 | }
116 | 
117 | func BenchmarkTreeNegative6(b *testing.B) {
118 | 	benchmarkCase(b, parseTestCases[6].Rule, parseTestCases[6].Neg[0])
119 | }
120 | 


--------------------------------------------------------------------------------