├── .gitignore
├── .gitlab-ci.yml
├── LICENSE
├── Makefile
├── README.md
├── make.bat
└── source
    ├── 12years.rst
    ├── _static
        └── ebook-cover.jpg
    ├── about-stamus.rst
    ├── about.rst
    ├── authors.rst
    ├── conf.py
    ├── dns-threat-hunting.rst
    ├── file-analysis.rst
    ├── flow-hunting.rst
    ├── foreword.rst
    ├── generic-hunting.rst
    ├── http-threat-hunting.rst
    ├── img
        ├── Suricata_SMB_Subobject.png
        ├── Suricata_TLS_in_JSON.png
        ├── Suricata_Timeline.png
        ├── alert-metadata.png
        ├── alert-sig-metadata.png
        ├── directionality-warning.png
        ├── missing-http.png
        ├── mixed-content.png
        ├── query-dsl.png
        ├── signatures-ordered.png
        ├── sn-3-to-1-without-negatives.png
        ├── sn-network-diagram.png
        ├── splunk-expired-tls.png
        ├── splunk-tls-cipher.png
        ├── stamus-backcover.jpg
        ├── stamus-background.jpg
        ├── stamus-logo.png
        ├── stamus-title.jpg
        ├── virustotal.png
        └── vscode-sample.png
    ├── index.rst
    ├── intro.rst
    ├── license.rst
    ├── practical-rules-writing.rst
    ├── smb-threat-hunting.rst
    ├── stamus.sty
    ├── suricata-as-ids.rst
    ├── tls-keywords.csv
    ├── tls-threat-hunting.rst
    ├── tools.rst
    └── write-performant-rules.rst


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | pdf:
 2 |   image: sphinxdoc/sphinx-latexpdf
 3 |   script:
 4 |   - make latex
 5 |   - cd build/latex/
 6 |   - sed -e 's/chapter{Preface}/chapter*{Preface}/' -i thesecurityanalystsguidetosuricata.tex
 7 |   - make
 8 |   - cd ../..
 9 |   - make epub
10 |   artifacts:
11 |     expire_in: 2 week
12 |     paths:
13 |       - "build/latex/*pdf"
14 |       - "build/epub/*epub"
15 | 
16 | print:
17 |   image: sphinxdoc/sphinx-latexpdf
18 |   script:
19 |   - sed -e 's/   license//' -i source/about.rst
20 |   - make latex PRINT=1
21 |   - cd build/latex/
22 |   - sed -e 's/chapter{Preface}/chapter*{Preface}/' -i thesecurityanalystsguidetosuricata.tex
23 |   - make
24 |   - mv thesecurityanalystsguidetosuricata.pdf thesecurityanalystsguidetosuricata-print.pdf
25 |   artifacts:
26 |     expire_in: 2 week
27 |     paths:
28 |       - "build/latex/*-print.pdf"
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-ShareAlike 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-ShareAlike 4.0 International Public
 58 | License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-ShareAlike 4.0 International Public License ("Public
 63 | License"). To the extent this Public License may be interpreted as a
 64 | contract, You are granted the Licensed Rights in consideration of Your
 65 | acceptance of these terms and conditions, and the Licensor grants You
 66 | such rights in consideration of benefits the Licensor receives from
 67 | making the Licensed Material available under these terms and
 68 | conditions.
 69 | 
 70 | 
 71 | Section 1 -- Definitions.
 72 | 
 73 |   a. Adapted Material means material subject to Copyright and Similar
 74 |      Rights that is derived from or based upon the Licensed Material
 75 |      and in which the Licensed Material is translated, altered,
 76 |      arranged, transformed, or otherwise modified in a manner requiring
 77 |      permission under the Copyright and Similar Rights held by the
 78 |      Licensor. For purposes of this Public License, where the Licensed
 79 |      Material is a musical work, performance, or sound recording,
 80 |      Adapted Material is always produced where the Licensed Material is
 81 |      synched in timed relation with a moving image.
 82 | 
 83 |   b. Adapter's License means the license You apply to Your Copyright
 84 |      and Similar Rights in Your contributions to Adapted Material in
 85 |      accordance with the terms and conditions of this Public License.
 86 | 
 87 |   c. BY-SA Compatible License means a license listed at
 88 |      creativecommons.org/compatiblelicenses, approved by Creative
 89 |      Commons as essentially the equivalent of this Public License.
 90 | 
 91 |   d. Copyright and Similar Rights means copyright and/or similar rights
 92 |      closely related to copyright including, without limitation,
 93 |      performance, broadcast, sound recording, and Sui Generis Database
 94 |      Rights, without regard to how the rights are labeled or
 95 |      categorized. For purposes of this Public License, the rights
 96 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 97 |      Rights.
 98 | 
 99 |   e. Effective Technological Measures means those measures that, in the
100 |      absence of proper authority, may not be circumvented under laws
101 |      fulfilling obligations under Article 11 of the WIPO Copyright
102 |      Treaty adopted on December 20, 1996, and/or similar international
103 |      agreements.
104 | 
105 |   f. Exceptions and Limitations means fair use, fair dealing, and/or
106 |      any other exception or limitation to Copyright and Similar Rights
107 |      that applies to Your use of the Licensed Material.
108 | 
109 |   g. License Elements means the license attributes listed in the name
110 |      of a Creative Commons Public License. The License Elements of this
111 |      Public License are Attribution and ShareAlike.
112 | 
113 |   h. Licensed Material means the artistic or literary work, database,
114 |      or other material to which the Licensor applied this Public
115 |      License.
116 | 
117 |   i. Licensed Rights means the rights granted to You subject to the
118 |      terms and conditions of this Public License, which are limited to
119 |      all Copyright and Similar Rights that apply to Your use of the
120 |      Licensed Material and that the Licensor has authority to license.
121 | 
122 |   j. Licensor means the individual(s) or entity(ies) granting rights
123 |      under this Public License.
124 | 
125 |   k. Share means to provide material to the public by any means or
126 |      process that requires permission under the Licensed Rights, such
127 |      as reproduction, public display, public performance, distribution,
128 |      dissemination, communication, or importation, and to make material
129 |      available to the public including in ways that members of the
130 |      public may access the material from a place and at a time
131 |      individually chosen by them.
132 | 
133 |   l. Sui Generis Database Rights means rights other than copyright
134 |      resulting from Directive 96/9/EC of the European Parliament and of
135 |      the Council of 11 March 1996 on the legal protection of databases,
136 |      as amended and/or succeeded, as well as other essentially
137 |      equivalent rights anywhere in the world.
138 | 
139 |   m. You means the individual or entity exercising the Licensed Rights
140 |      under this Public License. Your has a corresponding meaning.
141 | 
142 | 
143 | Section 2 -- Scope.
144 | 
145 |   a. License grant.
146 | 
147 |        1. Subject to the terms and conditions of this Public License,
148 |           the Licensor hereby grants You a worldwide, royalty-free,
149 |           non-sublicensable, non-exclusive, irrevocable license to
150 |           exercise the Licensed Rights in the Licensed Material to:
151 | 
152 |             a. reproduce and Share the Licensed Material, in whole or
153 |                in part; and
154 | 
155 |             b. produce, reproduce, and Share Adapted Material.
156 | 
157 |        2. Exceptions and Limitations. For the avoidance of doubt, where
158 |           Exceptions and Limitations apply to Your use, this Public
159 |           License does not apply, and You do not need to comply with
160 |           its terms and conditions.
161 | 
162 |        3. Term. The term of this Public License is specified in Section
163 |           6(a).
164 | 
165 |        4. Media and formats; technical modifications allowed. The
166 |           Licensor authorizes You to exercise the Licensed Rights in
167 |           all media and formats whether now known or hereafter created,
168 |           and to make technical modifications necessary to do so. The
169 |           Licensor waives and/or agrees not to assert any right or
170 |           authority to forbid You from making technical modifications
171 |           necessary to exercise the Licensed Rights, including
172 |           technical modifications necessary to circumvent Effective
173 |           Technological Measures. For purposes of this Public License,
174 |           simply making modifications authorized by this Section 2(a)
175 |           (4) never produces Adapted Material.
176 | 
177 |        5. Downstream recipients.
178 | 
179 |             a. Offer from the Licensor -- Licensed Material. Every
180 |                recipient of the Licensed Material automatically
181 |                receives an offer from the Licensor to exercise the
182 |                Licensed Rights under the terms and conditions of this
183 |                Public License.
184 | 
185 |             b. Additional offer from the Licensor -- Adapted Material.
186 |                Every recipient of Adapted Material from You
187 |                automatically receives an offer from the Licensor to
188 |                exercise the Licensed Rights in the Adapted Material
189 |                under the conditions of the Adapter's License You apply.
190 | 
191 |             c. No downstream restrictions. You may not offer or impose
192 |                any additional or different terms or conditions on, or
193 |                apply any Effective Technological Measures to, the
194 |                Licensed Material if doing so restricts exercise of the
195 |                Licensed Rights by any recipient of the Licensed
196 |                Material.
197 | 
198 |        6. No endorsement. Nothing in this Public License constitutes or
199 |           may be construed as permission to assert or imply that You
200 |           are, or that Your use of the Licensed Material is, connected
201 |           with, or sponsored, endorsed, or granted official status by,
202 |           the Licensor or others designated to receive attribution as
203 |           provided in Section 3(a)(1)(A)(i).
204 | 
205 |   b. Other rights.
206 | 
207 |        1. Moral rights, such as the right of integrity, are not
208 |           licensed under this Public License, nor are publicity,
209 |           privacy, and/or other similar personality rights; however, to
210 |           the extent possible, the Licensor waives and/or agrees not to
211 |           assert any such rights held by the Licensor to the limited
212 |           extent necessary to allow You to exercise the Licensed
213 |           Rights, but not otherwise.
214 | 
215 |        2. Patent and trademark rights are not licensed under this
216 |           Public License.
217 | 
218 |        3. To the extent possible, the Licensor waives any right to
219 |           collect royalties from You for the exercise of the Licensed
220 |           Rights, whether directly or through a collecting society
221 |           under any voluntary or waivable statutory or compulsory
222 |           licensing scheme. In all other cases the Licensor expressly
223 |           reserves any right to collect such royalties.
224 | 
225 | 
226 | Section 3 -- License Conditions.
227 | 
228 | Your exercise of the Licensed Rights is expressly made subject to the
229 | following conditions.
230 | 
231 |   a. Attribution.
232 | 
233 |        1. If You Share the Licensed Material (including in modified
234 |           form), You must:
235 | 
236 |             a. retain the following if it is supplied by the Licensor
237 |                with the Licensed Material:
238 | 
239 |                  i. identification of the creator(s) of the Licensed
240 |                     Material and any others designated to receive
241 |                     attribution, in any reasonable manner requested by
242 |                     the Licensor (including by pseudonym if
243 |                     designated);
244 | 
245 |                 ii. a copyright notice;
246 | 
247 |                iii. a notice that refers to this Public License;
248 | 
249 |                 iv. a notice that refers to the disclaimer of
250 |                     warranties;
251 | 
252 |                  v. a URI or hyperlink to the Licensed Material to the
253 |                     extent reasonably practicable;
254 | 
255 |             b. indicate if You modified the Licensed Material and
256 |                retain an indication of any previous modifications; and
257 | 
258 |             c. indicate the Licensed Material is licensed under this
259 |                Public License, and include the text of, or the URI or
260 |                hyperlink to, this Public License.
261 | 
262 |        2. You may satisfy the conditions in Section 3(a)(1) in any
263 |           reasonable manner based on the medium, means, and context in
264 |           which You Share the Licensed Material. For example, it may be
265 |           reasonable to satisfy the conditions by providing a URI or
266 |           hyperlink to a resource that includes the required
267 |           information.
268 | 
269 |        3. If requested by the Licensor, You must remove any of the
270 |           information required by Section 3(a)(1)(A) to the extent
271 |           reasonably practicable.
272 | 
273 |   b. ShareAlike.
274 | 
275 |      In addition to the conditions in Section 3(a), if You Share
276 |      Adapted Material You produce, the following conditions also apply.
277 | 
278 |        1. The Adapter's License You apply must be a Creative Commons
279 |           license with the same License Elements, this version or
280 |           later, or a BY-SA Compatible License.
281 | 
282 |        2. You must include the text of, or the URI or hyperlink to, the
283 |           Adapter's License You apply. You may satisfy this condition
284 |           in any reasonable manner based on the medium, means, and
285 |           context in which You Share Adapted Material.
286 | 
287 |        3. You may not offer or impose any additional or different terms
288 |           or conditions on, or apply any Effective Technological
289 |           Measures to, Adapted Material that restrict exercise of the
290 |           rights granted under the Adapter's License You apply.
291 | 
292 | 
293 | Section 4 -- Sui Generis Database Rights.
294 | 
295 | Where the Licensed Rights include Sui Generis Database Rights that
296 | apply to Your use of the Licensed Material:
297 | 
298 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
299 |      to extract, reuse, reproduce, and Share all or a substantial
300 |      portion of the contents of the database;
301 | 
302 |   b. if You include all or a substantial portion of the database
303 |      contents in a database in which You have Sui Generis Database
304 |      Rights, then the database in which You have Sui Generis Database
305 |      Rights (but not its individual contents) is Adapted Material,
306 | 
307 |      including for purposes of Section 3(b); and
308 |   c. You must comply with the conditions in Section 3(a) if You Share
309 |      all or a substantial portion of the contents of the database.
310 | 
311 | For the avoidance of doubt, this Section 4 supplements and does not
312 | replace Your obligations under this Public License where the Licensed
313 | Rights include other Copyright and Similar Rights.
314 | 
315 | 
316 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
317 | 
318 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
319 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
320 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
321 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
322 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
323 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
324 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
325 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
326 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
327 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
328 | 
329 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
330 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
331 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
332 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
333 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
334 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
335 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
336 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
337 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
338 | 
339 |   c. The disclaimer of warranties and limitation of liability provided
340 |      above shall be interpreted in a manner that, to the extent
341 |      possible, most closely approximates an absolute disclaimer and
342 |      waiver of all liability.
343 | 
344 | 
345 | Section 6 -- Term and Termination.
346 | 
347 |   a. This Public License applies for the term of the Copyright and
348 |      Similar Rights licensed here. However, if You fail to comply with
349 |      this Public License, then Your rights under this Public License
350 |      terminate automatically.
351 | 
352 |   b. Where Your right to use the Licensed Material has terminated under
353 |      Section 6(a), it reinstates:
354 | 
355 |        1. automatically as of the date the violation is cured, provided
356 |           it is cured within 30 days of Your discovery of the
357 |           violation; or
358 | 
359 |        2. upon express reinstatement by the Licensor.
360 | 
361 |      For the avoidance of doubt, this Section 6(b) does not affect any
362 |      right the Licensor may have to seek remedies for Your violations
363 |      of this Public License.
364 | 
365 |   c. For the avoidance of doubt, the Licensor may also offer the
366 |      Licensed Material under separate terms or conditions or stop
367 |      distributing the Licensed Material at any time; however, doing so
368 |      will not terminate this Public License.
369 | 
370 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
371 |      License.
372 | 
373 | 
374 | Section 7 -- Other Terms and Conditions.
375 | 
376 |   a. The Licensor shall not be bound by any additional or different
377 |      terms or conditions communicated by You unless expressly agreed.
378 | 
379 |   b. Any arrangements, understandings, or agreements regarding the
380 |      Licensed Material not stated herein are separate from and
381 |      independent of the terms and conditions of this Public License.
382 | 
383 | 
384 | Section 8 -- Interpretation.
385 | 
386 |   a. For the avoidance of doubt, this Public License does not, and
387 |      shall not be interpreted to, reduce, limit, restrict, or impose
388 |      conditions on any use of the Licensed Material that could lawfully
389 |      be made without permission under this Public License.
390 | 
391 |   b. To the extent possible, if any provision of this Public License is
392 |      deemed unenforceable, it shall be automatically reformed to the
393 |      minimum extent necessary to make it enforceable. If the provision
394 |      cannot be reformed, it shall be severed from this Public License
395 |      without affecting the enforceability of the remaining terms and
396 |      conditions.
397 | 
398 |   c. No term or condition of this Public License will be waived and no
399 |      failure to comply consented to unless expressly agreed to by the
400 |      Licensor.
401 | 
402 |   d. Nothing in this Public License constitutes or may be interpreted
403 |      as a limitation upon, or waiver of, any privileges and immunities
404 |      that apply to the Licensor or You, including from the legal
405 |      processes of any jurisdiction or authority.
406 | 
407 | 
408 | =======================================================================
409 | 
410 | Creative Commons is not a party to its public
411 | licenses. Notwithstanding, Creative Commons may elect to apply one of
412 | its public licenses to material it publishes and in those instances
413 | will be considered the “Licensor.” The text of the Creative Commons
414 | public licenses is dedicated to the public domain under the CC0 Public
415 | Domain Dedication. Except for the limited purpose of indicating that
416 | material is shared under a Creative Commons public license or as
417 | otherwise permitted by the Creative Commons policies published at
418 | creativecommons.org/policies, Creative Commons does not authorize the
419 | use of the trademark "Creative Commons" or any other trademark or logo
420 | of Creative Commons without its prior written consent including,
421 | without limitation, in connection with any unauthorized modifications
422 | to any of its public licenses or any other arrangements,
423 | understandings, or agreements concerning use of licensed material. For
424 | the avoidance of doubt, this paragraph does not form part of the
425 | public licenses.
426 | 
427 | Creative Commons may be contacted at creativecommons.org.
428 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Security Analyst’s Guide to Suricata
 2 | 
 3 | ## Introduction
 4 | 
 5 | Stamus Networks is pleased to offer the first ‘open source’ Suricata-focused book, ``The Security Analyst’s Guide to Suricata``.
 6 | Written by the founders of [Stamus Networks](https://www.stamus-networks.com/), Éric Leblond and Peter Manev, this project provides SOC analysts and threat
 7 | hunters with information on entry points and in-depth coverage for the most important Suricata features.
 8 | 
 9 | The book is not to act as a replacement for the [Suricata manual](https://suricata.readthedocs.io/en/latest/), which is
10 | a valuable source of information and should be used as a reference tool by Suricata users. Instead, its unique ‘open source’
11 | format will grow and evolve over time with ongoing input from Éric and Peter as well as contributions and feedback from
12 | the Suricata community.
13 | 
14 | ## Contribution
15 | 
16 | We are listing the code/text for the book here, but you can also find the latest published
17 | version of the book at [Stamus Networks](https://www.stamus-networks.com/suricata-4-analysts?utm_source=SN-GitHub&utm_medium=web&utm_campaign=Suricata-4-Analysts). We welcome contributions to the book and you can
18 | propose updates and provide feedback on this Github. If you want to contribute a specific
19 | topic, please [check existing issues](https://github.com/StamusNetworks/suricata-4-analysts/issues) and, if needed, open one describing your addition so we can tell you if it is
20 | inline with the content we project on the book and has a chance to be accepted.
21 | 


--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/source/12years.rst:
--------------------------------------------------------------------------------
  1 | 13 years of innovation
  2 | ----------------------
  3 | 
  4 | Suricata 1.0 (July 2010) - Welcome to the HTTP World
  5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | Understanding the HTTP protocol was by far the most important breakthrough in the first release. Suricata 1.0, published in July 2010 after two years of development, was able to read a Snort ruleset but could use a series of new keywords to look for content in the protocol fields of HTTP using a port-agnostic approach.
  8 | 
  9 | For the first time, a signature could ask for a specific protocol field without having to do the protocol parsing by itself. Because of this, the complexity of the writing of signatures was decreased while, at the same time, performance was increased. Techniques such as multi-pattern matching enabled accelerated searches in these specific fields.
 10 | 
 11 | Another important feature of Suricata 1.0 was protocol recognition. The engine analyzes the beginning of the exchange on a stream to find out what protocol it is - completely independent of the Layer 4 port. This port-agnostic feature had a big impact in terms of detection rate, as a lot of malware at that time was using a high numbered port to connect to command and control servers and HTTP to exchange information. By being able to find HTTP independently of the port - a big accomplishment - it allowed Suricata to accurately detect the malware.
 12 | 
 13 | Suricata also offered multi-step detection thanks to the inclusion of keywords that were the first step toward overcoming the low expressivity of the signature language inherited from Snort.
 14 | 
 15 | For example, the “flowbits” keyword family provided a way to pass information between signatures, and thus allow users to construct a state engine. While it was limited to the description of the state inside a single flow, it was real progress.
 16 | 
 17 | One other feature of this first release broke the previously strict definition of what an IDS was: logging HTTP requests to a file. This was not in the initial specifications, but it turned out to not be too complex to build and did not have a major impact on performance. This opportunistic approach continues to uniquely define all Suricata development.
 18 | 
 19 | 
 20 | Suricata 1.2 (January 2012) - File Extraction
 21 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 22 | 
 23 | File transaction info was added with version 1.2, which was released in January 2012, and then was extended in version 1.3 which released six months later. Understanding the HTTP protocol gave Suricata the visibility to see what was transmitted in requests, so it was natural to perform an extraction of the transmitted files.
 24 | 
 25 | This was added in version 1.2, along with file checksum computation and file transaction logging. In Suricata 1.3, the keyword “filemd5” was added to verify if the md5 checksum of the transmitted file was present in a list stored in a file. The feature would be extended later to sha1 and sha256, with the “filesha1” and “filesha256” keywords.
 26 | 
 27 | Note: extraction of files using SMTP protocol was contributed by BAE Systems in Suricata 2.1.
 28 | 
 29 | 
 30 | Suricata 1.3 (July 2012) - Transport Layer Security (TLS)
 31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 32 | 
 33 | In July 2012, Suricata 1.3 was released and with it came support for TLS -- contributed by Pierre Chifflier working for ANSSI, the French agency responsible for cyberdefense. This TLS implementation does not include decryption but is instead an analysis of the TLS handshake with extraction of unique transaction characteristics such as certificate subject, issuer, and its fingerprint.
 34 | 
 35 | At this stage, it is clear that Suricata is shifting away from the classic role of IDS as the presenter of simple data. The system is embracing complex decoding and extracting data that is simply not visible to the naked eye. This began with HTTP message decompression and continued from this point forward.
 36 | 
 37 | This TLS support is now used to address the evolution of malware that began to use encrypted communication. For example, currently available signatures now readily detect connections to servers using default OpenSSL configurations.
 38 | 
 39 | Dedicated TLS keywords were also added with this release, and all TLS events are logged into a dedicated file.
 40 | 
 41 | This mixed approach - using both IDS and network security monitoring (NSM) at the same time - builds upon what was done with HTTP and will prove to become the standard going forward for each new protocol supported: adding the dynamic protocol identification, logging events, offering dedicated keywords, and extracting files.
 42 | 
 43 | The evolution of TLS support has continued over multiple versions of Suricata.
 44 | 
 45 | 
 46 | Suricata 1.4 (December 2012) - Support for Lua
 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 48 | 
 49 | With the release of Suricata 1.4 in December 2012, Suricata added a second major signature language in Lua, a lightweight, multi-paradigm programming language designed primarily for embedded use in applications.
 50 | 
 51 | Signatures could now include a Lua script as a feature. This script uses Suricata-exposed buffers such as the packet content or the TLS information, and its return value is 1 for a match and 0 for no match. The Lua script may also create or modify flowbits variables.
 52 | 
 53 | With this added capability, Suricata now had a real programming language that could be used by the system to save states. This opened up a range of possibilities. The Lua support, for instance, could be used to write a very accurate signature to detect Heartbleed attack attempts. In fact, that signature was available a few hours after an attack was announced, and it would be the only IDS signature-based approach to provide accurate detection of Heartbleed.
 54 | 
 55 | Unfortunately, Lua support did not have the success that the development team had expected - and for a trivial reason. In order to be evaluated with the signature, the Lua script for a signature must be inserted as a file next to the signatures file. But adding this type of file was not supported by the existing signature/rule management tools, and no major threat research organizations distributed signatures with Lua for this simple reason. Interest in Lua still exists today, and the increased activity around signature management tools means there is still some hope for the Lua signatures.
 56 | 
 57 | 
 58 | Suricata 2.0 (March 2014) - Welcome JSON
 59 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 60 | 
 61 | Suricata 2.0 was published in March of 2014, marking a major milestone in the evolution of Suricata. This came with the addition of JSON as the preferred format for Suricata-generated events. Thanks to this contribution from Tom Decanio, the project was finally leaving the dated format of the 1990s as JSON replaced the non-structured text format or binary format such as that seen in unified2. JSON provided an easy-to-extend and easy-to-use format for all Suricata events.
 62 | 
 63 | Thanks to JSON formatting, sending Suricata-generated data to tools such as the Elastic stack or Splunk was easy to do. Suricata 2.0 came with a native “correlation” capability that can be made using the name of the fields used. A source IP is always the “src_ip” field. On top of that, all events can now be found in one file (by default), containing, for example, different types of logs and alerts and/or separate DNS, SSH, TLS, HTTP transactions, and even performance data.
 64 | 
 65 | .. figure:: img/Suricata_TLS_in_JSON.png
 66 | 
 67 |    TLS event in JSON form.
 68 | 
 69 | On the intrusion detection side, having an alternative to the unified2 format was a big improvement. This binary format dedicated to alerts only supported IP fields, the payload, and basic information about the signature. Unfortunately, it was almost impossible to extend it to add more contextual information to the alerts.
 70 | 
 71 | Because Suricata now supported more protocols, it was possible to add contextual information to alerts. Being able to look at the extracted fields and run statistics on them has the potential to make the job of the analyst simpler and more efficient. 
 72 | 
 73 | For Suricata 2.1, this philosophy was embraced more completely, by adding application layer metadata in the alerts, starting with HTTP. The work on this feature continued throughout the release, and metadata was added for many other protocols. Later, in version 4.0, this logic was pushed further by adding the logging of the HTTP body. These fields are often compressed, so logging the content was not directly useful. Providing the decompressed data did, however, allow for direct analysis.
 74 | 
 75 | 
 76 | Suricata 3.0 (January 2016) - Debut of Xbits Keyword
 77 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 78 | 
 79 | Suricata 3.0 was published in January 2016, with the primary new feature being the “xbits” keyword. The concept of xbits is to go beyond the limitations of flowbits, which could not be used in multi-flow attacks. Xbits is an evolution of flowbits, in which the variable is attached to an IP address or to an IP pair. Signatures can then collaborate inside a state machine that is not limited to a single flow.
 80 | 
 81 | 
 82 | Suricata 4.0 (July 2017) - In Rust we Trust
 83 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 84 | 
 85 | In addition to support for a number of new protocols, Suricata 4.0 introduced a more secure and efficient common parsing technique into the core. Using a combination of the Rust language and Nom parser (see https://github.com/Geal/nom), it set the stage for the rapid increase in the protocols supported by Suricata without sacrificing security and stability of the engine. This will prove critical for paving the way for the complete NSM functionality.
 86 | 
 87 | On the functional side, Network File System (NFS) and Network Time Protocol (NTP) were the two big protocol additions in version 4.0. 
 88 | 
 89 | Support for several other new protocols - specifically Server Message Block (SMB) and Dynamic Host Configuration Protocol (DHCP) - was added in release 4.1. These are mainly used in internal networks and with their support Suricata can more effectively analyze internal traffic, providing two primary benefits: primarily, increased visibility in encrypted environments; secondly, providing more complete detection of threats as they move laterally within a network.
 90 | 
 91 | 
 92 | Suricata 4.1 (December 2018) - Samba Time
 93 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 94 | 
 95 | The major highlight of Suricata 4.1 was the support for the SMB protocol family. Complete protocol support was added, including dedicated keywords, metadata logging, and file extraction. The impact on the deployment of Suricata on internal traffic has been quite huge. The metadata records are complete and enable the creation of a fine-grained analysis strategy. The following event is an example of a transaction on a share:
 96 | 
 97 | .. figure:: img/Suricata_SMB_Subobject.png
 98 |    :scale: 40 %
 99 | 
100 |    SMB sub object in an smb event.
101 | 
102 | 
103 | Suricata 5 (October 2019) - Introduction of Datasets
104 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
105 | 
106 | The introduction of datasets was the primary enhancement included with Suricata version 5, released in October 2019. This added the ability to match on a list of more than 50 different buffers and check a list of hostnames against a “known bad” database in the HTTP hostname, TLS Server Name Indication, or an HTTP user agent list.
107 | 
108 | It is important to note that these lists may include anywhere from a few items to millions of them without degrading the system performance. This is a key feature, considering the trend toward threat intelligence sharing and the use of tools such as MISP.
109 | 
110 | Another interesting aspect of datasets is Suricata’s capability to add and delete elements from a set by triggering changes with signatures. This feature has, for example, been used to create a learned list, tracking what is seen on the network and when and to build a new class of machine-learning based detection.
111 | 
112 | 
113 | Suricata 6 (October 2020) - Additional Protocol Support
114 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115 | 
116 | The primary contribution of Suricata 6 increased the body of supported protocols. From a user perspective, the introduction of HTTP/2 support was critical. Given that almost half of the top 10 millions websites are supporting this protocol, it was essential for Suricata to be able to log HTTP/2 protocol transactions and run threat detection on it.
117 | 
118 | This version also added support for other important protocols, including Message Queuing Telemetry Transport (MQTT, contributed by DCSO) for Internet of Things (IoT) environments and Remote Frame Buffer (used for remote desktop sessions).
119 | 
120 | Although it was an “under the hood” feature, the switch to an internally developed JSON generator in Suricata 6 is worth mentioning. With users deploying Suricata in 100 Gbps environments and with application layer logging being an important feature, the number of events per second generated can be quite high. For example, it is not uncommon for a 100 Gbps deployment to generate hundreds of thousands alert events per second on a single probe. As a consequence, the generation of JSON events using the original libjansson library ended up being a bottleneck. With Suricata 6, this was replaced by a custom JSON generator written in Rust which significantly lowers the performance burden of logging.
121 | 
122 | 
123 | Suricata 7 (July 2023)  - Maturity
124 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125 | 
126 | Three years have passed between the versions 6 and 7 which was, like Covid-19, unexpected. This release features a bit set of improvements.
127 | Some really old inquiries such as the parsing, logging, and detection of TLS client certificates as well as conditional pcap have been addressed.
128 | The conditional pcap feature has been a common request for multiple years. The concept is to generate a pcap file on an alert in order to have evidence and low level data for investigation. Full packet capture has been available for years, but in term of storage this is incredibly costly. With conditional packet capture the storage is limited to packets belonging to flows with alert.
129 | 
130 | On the performance side, two new packet capture mechanisms have been added. AF_XDP and DPDK bring significant performance improvements at
131 | really high speed. Both provide kernel bypass to avoid useless treatment in kernel, as everything that matters to Suricata is the packet on the wire
132 | rather than its routing. AF_XDP is available in the most recent Linux kernel and DPDK requires a custom driver and specific NIC such as those provded by Intel.
133 | 


--------------------------------------------------------------------------------
/source/_static/ebook-cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/_static/ebook-cover.jpg


--------------------------------------------------------------------------------
/source/about-stamus.rst:
--------------------------------------------------------------------------------
  1 | About Stamus Networks
  2 | =====================
  3 | 
  4 | Stamus Networks is the global leader in enterprise class Suricata-based network security.
  5 | 
  6 | Stamus Networks was founded in Paris, France by Éric Leblond and Peter Manev in 2014 who wanted to build a company committed to developing powerful tools that help defenders do their jobs easier and more effectively. They selected the company name based on the Latin word “Stamus” which means “we stand” as a reminder of that promise.
  7 | 
  8 | A Company Built on Suricata Expertise
  9 | -------------------------------------
 10 | 
 11 | Éric is a board member and Peter is an executive team member of the Open Information Security Foundation (OISF). Both are active contributors to the Suricata project, the open source high-performance intrusion detection (IDS) and network security monitoring (NSM) engine which is governed by the OISF. Stamus Networks supports the Open Information Security Foundation through its membership in the OISF Consortium.
 12 | 
 13 | Under the leadership of Éric and Peter, Stamus Networks applies its extensive Suricata and network security technology experience to develop its advanced network-based threat detection and response solutions.
 14 | 
 15 | The company's team of world-class network security and open source experts apply critical new technologies such as advanced analytics and machine learning to this strong foundation while preserving the openness, configurability, interoperability, and transparency that their customers have come to expect from their current security infrastructure.
 16 | 
 17 | Other Open Source Contributions
 18 | -------------------------------
 19 | 
 20 | Stamus Networks has a long history of developing and supporting open source technologies.
 21 | 
 22 | In addition to extensive contributions to Suricata itself, the team at Stamus Labs have six active projects underway.
 23 | 
 24 |  - Since 2014, Stamus Networks has developed and maintained SELKS, the popular turnkey Suricata-based open source intrusion detection system (IDS), Network Security Monitoring (NSM), and threat hunting system. Whilst building SELKS, Stamus Networks created nearly 30 dashboards comprised of over 400 visualizations for use with data generated by Suricata with any Elastic stack. These dashboards can also be used separately.
 25 |  - In 2020, Stamus Networks introduced the Stamus App for Splunk which is a free and open source Splunk app for investigating and hunting in the IDS alert data and protocol transaction logs generated by Suricata sensors. This Splunk app also provides complete access to data from Clear NDR Enterprise.
 26 |  - Also in 2020, Stamus Networks introduced GopherCAP, an innovative PCAP manipulation application that provides accurate playback of extra large PCAP files directly from tar archives.
 27 |  - In January 2022, Stamus Networks introduced the Suricata Language Server, a tool that adds syntax checking, performance guidance, and auto-completion to popular text editors for Suricata signature developers.
 28 |  - In November 2022, the company published the "Security Analyst's Guide to Suricata" - the world's first practical guide for unlocking the full potential of Suricata. Co-authored by Éric and Peter, it is written for security operations center (SOC) analysts and threat hunters who use Suricata to gain insights into what is taking place on their networks. The book provides vital information on entry points and in-depth analysis on the most important Suricata features.
 29 |  - Also, in November 2022, Stamus Networks introduced a free Suricata ruleset specifically focused on detecting lateral movement in Microsoft Windows environments
 30 |  - In 2023, Stamus Networks introduced Jupyter Playbooks for Suricata
 31 |  - Also in 2023, Stamus Networks introduced a set of free newly-registered domain threat intelligence feeds, optimized for Suricata.
 32 |  - In 2024, Stamus Networks announced a Clear NDR Community, a free and open source version of Clear NDR Enterprise that is the successor of SELKS.
 33 | 
 34 | Visit `Stamus Labs page <https://www.stamus-networks.com/stamus-labs>`_ to learn more.
 35 | 
 36 | Stamus Networks Today
 37 | ---------------------
 38 | 
 39 | Today, Stamus Networks supports customers in 13 countries and maintains offices in eight countries around the world. The company is focused on helping enterprise security teams accelerate their response to threats with solutions that uncover serious and imminent risk from cloud and on-premise network activity. The company’s advanced network detection and response (NDR) systems expose threats to critical assets and empower rapid response.
 40 | 
 41 | Stamus Networks is a trusted partner to many of the world’s most targeted organizations, including government CERTs, central banks, insurance providers, managed security service providers, financial service providers, multinational government institutions, broadcasters, travel and hospitality companies, and even a market-leading cybersecurity SaaS vendor.
 42 | 
 43 | Unlike other network security companies, Stamus Networks delivers truly useful detection with explainable results at enterprise scale by applying the right technologies to the right problems, while avoiding the hype, fear and exaggeration that is often employed by security vendors.
 44 | 
 45 | Technology Proven in NATO Cyber Exercises
 46 | -----------------------------------------
 47 | 
 48 | For more than five years, Stamus Networks and Clear NDR have been invited by the NATO Cooperative Cyber Defense Center of Excellence (CCDCOE) to join their annual Locked Shields and Crossed Swords live fire exercises where the company refines and validates its latest threat detection capabilities.
 49 | 
 50 | The Clear NDR Enterprise™ (Clear NDR)
 51 | -----------------------------------
 52 | Clear NDR Enterprise™ (Clear NDR) is an open network detection and response solution (NDR) built on a Suricata foundation that delivers actionable network visibility and powerful threat detection to enterprise security teams.
 53 | 
 54 | By combining the best capabilities of intrusion detection systems (IDS), network security monitoring (NSM), and network detection and response (NDR), Clear NDR Enterprise is a new generation of NDR that helps organizations expose serious threats and unauthorized activity lurking in their network.
 55 | 
 56 | This helps reduce tool sprawl and helps organizations meet their governance, risk, compliance, and operational security challenges with a single consolidated solution.
 57 | 
 58 | 
 59 | .. image:: img/sn-3-to-1-without-negatives.png
 60 | 
 61 | 
 62 | Organizations select Clear NDR Enterprise for one or more of the unique capabilities it brings them:
 63 | 
 64 |  - **Greater visibility into threats & activity** - By using multiple detection technologies and guided threat hunting users uncover even the weakest attack signals and unauthorized activities hiding in the network.
 65 |  - **Transparent detections with detailed evidence** - Understand exactly what triggered an event with a detailed attack timeline along with all the evidence needed to respond quickly and stop a breach before damage is done.
 66 |  - **Open and extensible for any environment** - Users can augment Clear NDR’s built-in detections with third party threat intel and signatures or develop their own custom detections. Clear NDR can easily integrate into nearly any existing security tech stack.
 67 |  - **Built for enterprise-scale operations** - Clear NDR scales from a small stand-alone instance to multi-site, multi-100Gbps deployments integrated into the SOC/SIEM/SOAR while tracking activity of millions of hosts.
 68 |  - **Optional air-gapped deployment** - Optionally deploy our central analytics system on premise or in your datacenter, so you do not ship sensitive information to a SaaS-based system. And Clear NDR may even be deployed in a completely air-gapped environment.
 69 |  - **Use Stamus Network Probes or existing Suricata sensors** - Use Clear NDR to supercharge existing Suricata deployments.  Organizations can start with their existing Suricata sensors, while transitioning to the more advanced Stamus Network Probes.
 70 | 
 71 |  The network "perimeter" has expanded dramatically and so has the attack surface in most organizations. In order to eliminate blind spots, it is crucial to monitor the network for east-west and north-south traffic at all these sites. Clear NDR Enterprise™ is designed to do just that.
 72 | 
 73 | Clear NDR Enterprise consists of two components: Stamus Network Probe(s) and Stamus Central Server which may be deployed in private cloud, public cloud, on-premise, or hybrid environments.
 74 | 
 75 | .. image:: img/sn-network-diagram.png
 76 | 
 77 | Stamus Network Probes
 78 | ~~~~~~~~~~~~~~~~~~~~~
 79 | 
 80 | Stamus Network Probes™ inspect and analyze all network traffic using deep packet inspection (DPI) to perform real-time threat detection, enrich the resulting events with extensive metadata, and capture network protocol transactions, flow data, extracted files, and full packet capture (PCAPs).
 81 | 
 82 | The probe delivers all this data to the Stamus Central Server™ for additional analytics, processing, and another layer of threat detection.
 83 | 
 84 | Stamus Central Server
 85 | ~~~~~~~~~~~~~~~~~~~~~
 86 | 
 87 | Stamus Central Server™ provides the centralized management of the probes, third party threat intelligence and rulesets, consolidated event storage, and a central integration point.
 88 | 
 89 | It includes an additional layer of machine learning and algorithmic threat detection, along with automated event triage – enabled by tagging and classification. Finally, the Stamus Central Server provides a powerful threat hunting and incident investigation user interface.
 90 | 
 91 | Straightforward Licensing
 92 | ~~~~~~~~~~~~~~~~~~~~~~~~~
 93 | 
 94 | Pricing for Clear NDR Enterprise is based on the number of links being monitored and the speed of those links. This simple licensing model supports an unlimited number of users and hosts.
 95 | 
 96 | Clear NDR is available in two simple license tiers to fit the needs of the organization. The table below summarizes the differences between these two license tiers.
 97 | 
 98 | 
 99 | 
100 | +------------------------------------------------------------------+---------------------+----------------------+
101 | |                                                                  | Clear NDR Community | Clear NDR Enterprise |
102 | +==================================================================+=====================+======================+
103 | | Signature and reputation list-based threat detection             | ✓                   | ✓                    |
104 | +------------------------------------------------------------------+---------------------+----------------------+
105 | | Flow and protocol-based data enrichment and event capture        | ✓                   | ✓                    |
106 | +------------------------------------------------------------------+---------------------+----------------------+
107 | | Tagging and classification for auto event triage                 | ✓                   | ✓                    |
108 | +------------------------------------------------------------------+---------------------+----------------------+
109 | | Guided threat hunting                                            | ✓                   | ✓                    |
110 | +------------------------------------------------------------------+---------------------+----------------------+
111 | | Machine learning and algorithmic detection engines               |                     | ✓                    |
112 | +------------------------------------------------------------------+---------------------+----------------------+
113 | | Stamus threat intelligence and customized detection              |                     | ✓                    |
114 | +------------------------------------------------------------------+---------------------+----------------------+
115 | | Asset-oriented insights                                          |                     | ✓                    |
116 | +------------------------------------------------------------------+---------------------+----------------------+
117 | | Declarations of Compromise™ - high-fidelity threat notifications |                     | ✓                    |
118 | +------------------------------------------------------------------+---------------------+----------------------+
119 | | Declarations of Policy Violations                                |                     | ✓                    |
120 | +------------------------------------------------------------------+---------------------+----------------------+
121 | 
122 | Learn More
123 | ----------
124 | 
125 | Visit the `Stamus Networks website <https://www.stamus-networks.com/>`_ to learn more about the company and request a demonstration of Clear NDR Enterprise.
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/source/about.rst:
--------------------------------------------------------------------------------
1 | About
2 | =====
3 | 
4 | .. toctree::
5 | 
6 |    authors
7 |    license
8 | 


--------------------------------------------------------------------------------
/source/authors.rst:
--------------------------------------------------------------------------------
 1 | Authors and contributors
 2 | ========================
 3 | 
 4 | This document has been written by Éric Leblond and Peter Manev with the help of Mark Durrett, Dallon Robinette, and Phil Owens from Stamus Networks.
 5 | 
 6 | Éric Leblond
 7 | ------------
 8 | 
 9 | Éric Leblond is the co-founder and chief technology officer (CTO) of Stamus Networks and a member of the board of directors at Open Network Security Foundation (OISF). Éric has more than 15 years of experience as co-founder and technologist of cybersecurity software companies and is an active member of the security and open-source communities. He has worked on the development of Suricata – the open-source network threat detection engine – since 2009 and is part of the Netfilter Core team, responsible for the Linux kernel's firewall layer. Éric is also the lead developer of the Suricata Language Server, a real-time syntax checking and autocomplete app for Suricata rule writers. Éric is a well-respected expert and speaker on network security.
10 | 
11 | 
12 | Peter Manev
13 | -----------
14 | 
15 | Peter Manev is the co-founder and chief strategy officer (CSO) of Stamus Networks and a member of the executive team at Open Network Security Foundation (OISF). Peter has over 15 years of experience in the IT industry, including enterprise-level IT security practice. He is a passionate user, developer, and explorer of innovative open-source security software. He is responsible for training as well as quality assurance and testing on the development team of Suricata – the open-source threat detection engine. Peter is also the lead developer of SELKS, the popular turnkey open-source implementation of Suricata. Peter is a regular speaker and educator on open-source security, threat hunting, and network security.
16 | 


--------------------------------------------------------------------------------
/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | # import os
 14 | # import sys
 15 | # sys.path.insert(0, os.path.abspath('.'))
 16 | 
 17 | import os
 18 | 
 19 | # -- Project information -----------------------------------------------------
 20 | 
 21 | project = 'The Security Analyst’s Guide to Suricata'
 22 | copyright = '2021-2025, Stamus Networks'
 23 | author = 'Éric Leblond and Peter Manev'
 24 | 
 25 | # The full version, including alpha/beta/rc tags
 26 | release = '3.0.0'
 27 | version  = '3.0.0'
 28 | 
 29 | 
 30 | # -- General configuration ---------------------------------------------------
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = [
 36 | ]
 37 | 
 38 | # Add any paths that contain templates here, relative to this directory.
 39 | templates_path = ['_templates']
 40 | 
 41 | # List of patterns, relative to source directory, that match files and
 42 | # directories to ignore when looking for source files.
 43 | # This pattern also affects html_static_path and html_extra_path.
 44 | exclude_patterns = []
 45 | 
 46 | 
 47 | # -- Options for HTML output -------------------------------------------------
 48 | 
 49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 50 | # a list of builtin themes.
 51 | #
 52 | html_theme = 'alabaster'
 53 | 
 54 | # Add any paths that contain custom static files (such as style sheets) here,
 55 | # relative to this directory. They are copied after the builtin static files,
 56 | # so a file named "default.css" will overwrite the builtin "default.css".
 57 | html_static_path = ['_static']
 58 | 
 59 | numfig = True
 60 | 
 61 | latex_show_urls = 'footnote'
 62 | 
 63 | latex_elements = {
 64 |     'pointsize': '10pt',
 65 |     'preamble': r'''
 66 | \usepackage{stamus}
 67 | ''',
 68 |     'maketitle': r'''
 69 | \makeatletter
 70 | \begin{titlepage}
 71 | \AddToShipoutPictureBG*{\BackgroundPic}
 72 | \py@HeaderFamily
 73 | \vspace*{4.5cm}
 74 | \hspace*{-0.9cm}
 75 | \begin{minipage}{16cm}
 76 | \textcolor{white}{
 77 | \textbf{
 78 | \raggedright
 79 | \begin{spacing}{1.1}
 80 | \fontsize{55}{65}\selectfont \@title
 81 | \end{spacing}
 82 | }
 83 | }
 84 | 
 85 | \vspace{-0.1cm}
 86 | \definecolor{title_yellow}{rgb}{0.984, 0.866, 0.004}
 87 | \textcolor{title_yellow}
 88 | {
 89 | \LARGE{By Éric Leblond \& Peter Manev}
 90 | }
 91 | \end{minipage}
 92 | \par
 93 | \vspace*{\fill}
 94 | \hspace*{-1.1cm}
 95 | \textcolor{white}{
 96 | \py@release \releaseinfo
 97 | }
 98 | \end{titlepage}
 99 | \vspace*{\fill}
100 | \textit{The Security Analyst’s Guide to Suricata} \\
101 | Copyright @ 2022-2023 by Éric Leblond and Peter Manev \\
102 | Published by Stamus Networks \\
103 | 450 E. 96th Street, Suite 500 \\
104 | Indianapolis, IN 46240 \\
105 | This work is licensed under Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license. \\
106 | ISBN: 979-8-9871510-1-3
107 | \makeatother
108 | \newpage
109 | \AddToShipoutPictureBG{\transparent{0.5}\includegraphics[width=\paperwidth,height=12cm]{stamus-background.jpg}}
110 | ''',
111 |     'printindex': r'''
112 | \renewcommand{\indexname}{Index}
113 | \printindex
114 | \cleartoleftpage
115 | \pagestyle{empty}
116 | \newgeometry{bottom=0.8cm}
117 | \vspace*{\fill}
118 | \ClearShipoutPictureBG
119 | \AddToShipoutPictureBG{\BackCoverPic}
120 | \sffamily
121 | \color{white}
122 | \hspace{-2.8cm}
123 | \begin{minipage}{12cm}
124 | \small
125 | \uppercase{About Stamus Networks}
126 | \vspace{0.3cm}
127 | 
128 | Stamus Networks believes in a world where defenders are heroes, and a 
129 | future where those they protect remain safe. As defenders face an onslaught
130 | of threats from well-funded adversaries, we relentlessly pursue solutions that
131 | make the defender’s job easier and more impactful. A global provider of
132 | high-performance network-based threat detection and response systems, 
133 | Stamus Networks helps enterprise security teams accelerate their response
134 | to critical threats with solutions that uncover serious and imminent risk
135 | from network activity. Our advanced network detection and response (NDR) 
136 | solutions expose threats to critical assets and empower rapid response.
137 | \vspace{0.8cm}
138 | 
139 | Copyright \copyright 2022-2023 \hspace{1cm} ISBN: 979-8-9871510-4-4
140 | \end{minipage}
141 | \hspace{0.5cm}
142 | \begin{minipage}{7cm}
143 | \small{
144 | \vspace*{1cm}
145 | \begin{tabular}{p{3.2cm}p{3.6cm}}
146 | \begin{center}
147 | 5 Avenue Ingres
148 | 
149 | 75016 Paris
150 | 
151 | France
152 | \end{center}
153 | &
154 | \begin{center}
155 | 450 E 96th St. Suite 500
156 | 
157 | Indianapolis, IN 46240
158 | 
159 | United States
160 | \end{center}
161 | \\
162 | \end{tabular}
163 | \begin{center}
164 | Mail: \href{mailto:contact@stamus-networks.com}{\textcolor{white}{contact@stamus-networks.com}}
165 | 
166 | Web: \href{https://www.stamus-networks.com}{\textcolor{white}{www.stamus-networks.com}}
167 | \end{center}
168 | }
169 | \end{minipage}
170 |     '''
171 | }
172 | 
173 | if os.getenv('PRINT'):
174 |     latex_elements = {
175 |         'geometry': r'''\usepackage[paperwidth=6.25in, paperheight=9.25in, top=0.625in, bottom=0.625in, left=0.875in, right=0.625in, includefoot, includehead]{geometry}''',
176 |         'pointsize': '10pt',
177 |         'maketitle': r'''\newcommand\sphinxbackoftitlepage{
178 | \vspace*{\fill}
179 | \textit{The Security Analyst’s Guide to Suricata} \\
180 | Copyright @ 2022-2023 by Éric Leblond and Peter Manev \\
181 | Published by Stamus Networks \\
182 | 450 E. 96th Street, Suite 500 \\
183 | Indianapolis, IN 46240 \\
184 | This work is licensed under Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license. \\
185 | Cover Design: Delin Design \\
186 | First printing: November, 2023 \\
187 | Printed in the United States of America \\
188 | Trade hardcover: ISBN: 979-8-9871510-3-7
189 | }
190 | \sphinxmaketitle
191 |         '''
192 |     }
193 | else:
194 |     latex_logo = "img/stamus-logo.png"
195 | 
196 | latex_additional_files = ["stamus.sty", "img/stamus-logo.png", "img/stamus-background.jpg", "img/stamus-title.jpg", "img/stamus-backcover.jpg"]
197 | 
198 | 
199 | epub_cover = ('_static/ebook-cover.jpg', '')
200 | epub_identifier = 'ISBN: 979-8-9871510-5-1'
201 | 
202 | today_fmt = '%B %Y'
203 | 


--------------------------------------------------------------------------------
/source/dns-threat-hunting.rst:
--------------------------------------------------------------------------------
  1 | ================================
  2 | DNS detection and threat hunting
  3 | ================================
  4 | 
  5 | Introduction
  6 | ============
  7 | 
  8 | DNS is everywhere, as its main feature of resolving host names to IP addresses is mandatory for almost all Internet traffic.
  9 | DNS protocol, however, is doing far more than that and interesting analysis can be done on some specific requests.
 10 | 
 11 | In most environments, DNS requests are relayed through the internal DNS servers. This property makes it really interesting
 12 | for exfiltration of data or tunnelling.
 13 | 
 14 | One last thing to mention on DNS: it just shows an attempt. A DNS request to a domain proves that the domain was known and that a request has happened. Potentially, it was reached later. For example, the request was just a check or was otherwise triggered by the prefetch function of the browser that triggers a resolution of domain on a page even if the user is not clicking on it.
 15 | 
 16 | 
 17 | Protocol overview
 18 | =================
 19 | 
 20 | In DNS protocol, the client requests a DNS server that is defined in its configuration to request information about a resource.
 21 | If the server is responsible (authoritative) for the resource attached to the request, it will answer directly to the client.
 22 | If the resource is not local then the DNS server will query a higher level DNS server that will have the answer or will query another even higher level server. This hierarchical approach and proxy by default behavior is really peculiar to this protocol and has some consequences.
 23 | 
 24 | If the capture point of the traffic is before the internet gateway, there is a high chance that the DNS traffic will come from an internal server. For example, in a Microsoft environment the Active Directory often serves as the first level DNS server for the computer in the domain. This is a problem with regards to the visibility as the real client IP address will be hidden behind the intermediate server.
 25 | 
 26 | DNS requests have multiple types. The most common ones are:
 27 | 
 28 |  - A: request the IPv4 address associated with a host name
 29 |  - AAAA: request the IPv6 address associated with a host name
 30 |  - MX: request the SMTP servers serving a domain name
 31 |  - SRV: ask for the service for a specific application and domain
 32 |  - TXT: the record is mostly used to provide key value pair for check perspective such as domain ownership
 33 | 
 34 | One request can have multiple answers. For example, if we ask the SMTP server for the domain `stamus-networks.com`
 35 | we have:
 36 | 
 37 | .. code-block::
 38 | 
 39 |    $ host -t MX stamus-networks.com
 40 |    stamus-networks.com mail is handled by 1 aspmx.l.google.com.
 41 |    stamus-networks.com mail is handled by 5 alt1.aspmx.l.google.com.
 42 |    stamus-networks.com mail is handled by 5 alt2.aspmx.l.google.com.
 43 |    stamus-networks.com mail is handled by 10 alt3.aspmx.l.google.com.
 44 |    stamus-networks.com mail is handled by 10 alt4.aspmx.l.google.com.
 45 | 
 46 | This definitely makes sense for SMTP servers as it allows for the definition of a hierarchy of servers and fail over.
 47 | 
 48 | But this is also true for a simple IPv4 request:
 49 | 
 50 | .. code-block::
 51 | 
 52 |     host -t A google.com
 53 |     google.com has address 142.250.147.113
 54 |     google.com has address 142.250.147.102
 55 |     google.com has address 142.250.147.101
 56 |     google.com has address 142.250.147.138
 57 |     google.com has address 142.250.147.139
 58 |     google.com has address 142.250.147.100
 59 | 
 60 | This potential asymmetry between request size and answer paired with the fact that request are done over UDP is used
 61 | in a `DNS amplification attack <https://www.cisa.gov/news-events/alerts/2013/03/29/dns-amplification-attacks>`_ where DNS
 62 | requests are sent with spoofed IP addresses (the victim address) that receives all the queried answers from the DNS servers. 
 63 | 
 64 | DNS analysis in Suricata
 65 | ========================
 66 | 
 67 | Suricata has extensive support of DNS protocol over TCP and UDP.
 68 | 
 69 | DNS request and reponse are logged in separate events.
 70 | 
 71 | The following event is a query because the `dns.type` value is `query` and the
 72 | query is an `A` (value of `dns.rrtype`) request to resolve the hostname
 73 | `germakhya.xyz` (value of `dns.rrname`):
 74 | 
 75 | .. code-block:: JSON
 76 | 
 77 |   {
 78 |   "timestamp": "2019-07-05T22:10:33.164698+0200",
 79 |   "flow_id": 425899832864145,
 80 |   "event_type": "dns",
 81 |   "src_ip": "10.7.5.101",
 82 |   "src_port": 50643,
 83 |   "dest_ip": "10.7.5.5",
 84 |   "dest_port": 53,
 85 |   "proto": "UDP",
 86 |   "dns": {
 87 |     "type": "query",
 88 |     "id": 62832,
 89 |     "rrname": "germakhya.xyz",
 90 |     "rrtype": "A",
 91 |     "tx_id": 0,
 92 |     "opcode": 0
 93 |     }
 94 |   }
 95 | 
 96 | The answer to the previous request is seen in the event below. `dns.type` is set to
 97 | `answer` and we can see that the `dns.id` field that stores the id of the DNS exchange
 98 | is set to the same number `62832`.
 99 | 
100 | .. code-block:: JSON
101 | 
102 |   {
103 |     "timestamp": "2019-07-05T22:10:33.369515+0200",
104 |     "flow_id": 425899832864145,
105 |     "event_type": "dns",
106 |     "src_ip": "10.7.5.101",
107 |     "src_port": 50643,
108 |     "dest_ip": "10.7.5.5",
109 |     "dest_port": 53,
110 |     "proto": "UDP",
111 |     "dns": {
112 |       "version": 2,
113 |       "type": "answer",
114 |       "id": 62832,
115 |       "flags": "8180",
116 |       "qr": true,
117 |       "rd": true,
118 |       "ra": true,
119 |       "opcode": 0,
120 |       "rrname": "germakhya.xyz",
121 |       "rrtype": "A",
122 |       "rcode": "NOERROR",
123 |       "answers": [
124 |         {
125 |           "rrname": "germakhya.xyz",
126 |           "rrtype": "A",
127 |           "ttl": 599,
128 |           "rdata": "95.142.46.236"
129 |         }
130 |       ],
131 |       "grouped": {
132 |         "A": [
133 |           "95.142.46.236"
134 |         ]
135 |       }
136 |     }
137 |   }
138 | 
139 | Two types of outputs containing the reply information are available and can be combined in answer events based on the configuration.
140 | `answers` displays the answers to the query with all parameters and the `grouped` output
141 | contains a list of values for every type of answers returned by the server.
142 | 
143 | DNS and detection
144 | =================
145 | 
146 | DNS keywords
147 | ------------
148 | 
149 | As of Suricata 7 there are two keywords dedicated to DNS: `dns.query` and `dns.opcode`.
150 | 
151 | `dns.query` is a sticky buffer checking the request value that is stored in the query event in the `dns.rrname` field.
152 | It can be used to match on DNS resolution and is therefore very useful to detect Indicators of Compromise (IoCs) in
153 | the traffic.
154 | 
155 | It worth mentioning that a DNS request to a domain does not indicate a connection to a domain but rather
156 | the proximity to this domain. Techniques such as browser prefetch can trigger DNS resolution on hostnames that
157 | are not visited but are present on a visited page. Additionally, DNS requests from security analysts checking
158 | attacks must also be mentioned.
159 | 
160 | The DNS opcode matches the opcode that contains the type of operations. The most significant
161 | are:
162 | 
163 |   - Query (0) for regular request/answer operation (see `RFC1035 <https://www.rfc-editor.org/rfc/rfc1035.html>`_)
164 |   - Notify (4) for notification about a zone change (see `RFC1996 <https://www.rfc-editor.org/rfc/rfc1996.html>`_)
165 |   - Update (5) for DNS Zone update operation (see `RFC2136 <https://www.rfc-editor.org/rfc/rfc2136.html>`_)
166 |   - DNS Stateful Operations (DSO) defined a protocol update for persistent stateful sessions (see `RFC8490 <https://www.rfc-editor.org/rfc/rfc8490.html>`_)
167 | 
168 | If opcode 0 just indicates a regular exchange, the events with opcode 5 contain information about the update of zones and can
169 | indicate interesting changes in the infrastructure.
170 | 
171 | Cookbook
172 | --------
173 | 
174 | Match on a domain and its subdomains
175 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 | 
177 | For instance, if the domain `germakhya.xyz` and all its subdomains are considered at risk, a signature
178 | can be constructed over the following match:
179 | 
180 | .. code-block::
181 | 
182 |     dns.query; dotprefix; content:".germakhya.xyz"; endswith;
183 | 
184 | See :ref:`HTTP match on subdomains <match subdomains>` for explanations on usage of `endswith` and `dotprefix` keywords.
185 | 
186 | Hunting on DNS events
187 | =====================
188 | 
189 | SRV requests and infrastructure discovery
190 | -----------------------------------------
191 | 
192 | The request of type `SRV` are defined in `RFC2782 <https://www.rfc-editor.org/rfc/rfc2782.html>`_ and allows
193 | users of the network to discover services. The following request is an example of SRV request where the client
194 | asks the service for `_ldap._tcp.pdc._msdcs.fashionkings.com` (in field `dns.rrname`).
195 | 
196 | .. code-block:: JSON
197 | 
198 |   "timestamp": "2022-10-31T16:59:49.846977+0100",
199 |   "flow_id": 1667414482265188,
200 |   "event_type": "dns",
201 |   "src_ip": "172.16.0.153",
202 |   "src_port": 56559,
203 |   "dest_ip": "172.16.0.12",
204 |   "dest_port": 53,
205 |   "proto": "UDP",
206 |   "dns": {
207 |     "type": "query",
208 |     "id": 3038,
209 |     "rrname": "_ldap._tcp.pdc._msdcs.fashionkings.com",
210 |     "rrtype": "SRV",
211 |     "tx_id": 0,
212 |     "opcode": 0
213 |   }
214 | 
215 | The construct of the requested service is interesting at it contains a lot of information:
216 | 
217 |  - `_ldap._tcp` is the service from an applicative point of view
218 |  - `fashionkings.com` is the domain name.
219 |  - `_msdcs` indicates a domain controler query
220 |  - `pdc` is used to request the primary domain controler
221 | 
222 | See `Microsoft documentation on DNS-Based Discovery <https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-adts/7fcdce70-5205-44d6-9c3a-260e616a2f04>`_
223 | 
224 | By consequence, the answer to this query will contain interesting information about the infrastructure:
225 | 
226 | .. code-block:: JSON
227 | 
228 |   "timestamp": "2022-10-31T16:59:49.847375+0100",
229 |   "flow_id": 1667414482265188,
230 |   "event_type": "dns",
231 |   "src_ip": "172.16.0.153",
232 |   "src_port": 56559,
233 |   "dest_ip": "172.16.0.12",
234 |   "dest_port": 53,
235 |   "proto": "UDP",
236 |   "dns": {
237 |     "version": 2,
238 |     "type": "answer",
239 |     "id": 3038,
240 |     "flags": "8580",
241 |     "opcode": 0,
242 |     "rrname": "_ldap._tcp.pdc._msdcs.fashionkings.com",
243 |     "rrtype": "SRV",
244 |     "rcode": "NOERROR",
245 |     "answers": [
246 |       {
247 |         "rrname": "_ldap._tcp.pdc._msdcs.fashionkings.com",
248 |         "rrtype": "SRV",
249 |         "ttl": 600,
250 |         "srv": {
251 |           "priority": 0,
252 |           "weight": 100,
253 |           "port": 389,
254 |           "name": "fashionkings-dc.fashionkings.com"
255 |         }
256 |       }
257 |     ],
258 | 
259 | Here we discover that the primary domain controler for the domain `fashionkings` is the host `fashionkings-dc.fashionkings.com` (field `dns.answers[0].name`)
260 | and that it runs as expected on port 389 (field `dns.answers[0].port`).
261 | 
262 | Finding guests on the network
263 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
264 | 
265 | The SRV requests can also be used to discover guests on the network. When a computer starts, it will try to connect
266 | to its configured Windows domain and to do that it will use DNS discovery. As a result, it will emit a DNS request
267 | that will contain a `dns.rrname` that will not be directed to the organization domain. The part after `_msdcs` will
268 | be the domain name the system is registered too.
269 | 
270 | This is usually a good technique to spot an unexpected laptop in a network.
271 | 
272 | DNS update
273 | ----------
274 | 
275 | Detecting DNS update can be useful to spot unwanted behavior. This can
276 | be done in Splunk with the following query:
277 | 
278 | .. code-block::
279 | 
280 |   event_type="dns" dns.opcode=5 | top src_ip, dest_ip
281 | 
282 | This will give the list of peers where updates are taking place. As of version 7, Suricata does not have a complete
283 | parsing of the update messages so information obtained in the corresponding events will be poor.
284 | 
285 | DNS tunneling detection
286 | -----------------------
287 | 
288 | Most common DNS tunneling solutions use the `TXT` to transmit the data. They can
289 | be detected by statistical analysis. A simple stats query in Splunk could be a good
290 | hunt start:
291 | 
292 | .. code-block::
293 | 
294 |   event_type="dns" dns.rrtype="TXT"" | stats count by src_ip
295 | 
296 | This query will output the IP addresses of the host that have done the most TXT requests
297 | in the network. If some high counts are reached (like thousands of requests) over a short period (like
298 | an hour) this may indicate that a DNS tunnel is active.
299 | 
300 | One enhancement of the previous approach is to use the average size of the dns event as
301 | a complementary selector. To send data via the tunnel, one of the protocol fields needs to 
302 | be used and as a result the size of the event should be higher than the norm. 
303 | 
304 | The following Splunk request gets all DNS queries and computes the size
305 | of the event, then get statistics:
306 | 
307 | .. code-block::
308 | 
309 |   event_type="dns" dns.type="query" | eval esize=len(_raw)
310 |     | stats count, avg(esize) by src_ip | sort -count
311 | 
312 | In the array below we can see that the first IP (which has
313 | a DNS tunnel in place) exhibits vastly different numbers than
314 | a regular host (second entry).
315 | 
316 | +--------------+-------+---------+
317 | | IP address   | Count | Avg Size|
318 | +==============+=======+=========+
319 | | 192.168.3.1  | 18939 | 1414.44 |
320 | +--------------+-------+---------+
321 | | 172.16.1.152 | 150   | 574.28  |
322 | +--------------+-------+---------+
323 | 


--------------------------------------------------------------------------------
/source/file-analysis.rst:
--------------------------------------------------------------------------------
  1 | .. _file-analysis:
  2 | 
  3 | =============
  4 | File Analysis
  5 | =============
  6 | 
  7 | 
  8 | Introduction
  9 | ============
 10 | 
 11 | Because Suricata understands most major application layers, it is able to track the file transferred over the wire. The list of application layers supporting file extraction includes:
 12 | 
 13 |  - HTTP
 14 |  - FTP
 15 |  - SMB
 16 |  - NFS
 17 |  - SMTP
 18 |  - HTTP2 
 19 | 
 20 | Interesting features result from this. First, it allows Suricata to generate events containing information about the files. The :ref:`fileinfo events <fileinfo-events>` are generated once any tracked file transfer is over (independently of any detection). These events contain details about the file such as its name, various hashes of its content (sha1, sha256, ...), and identification of the file type based on its content.
 21 |  
 22 | The second interesting feature is the extraction of the file which is triggered by the `filestore <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html?#filestore>`_ keyword in signature. Extraction can also be switched on globally, but it is really intensive in term of performance. One thing to mention about extraction is that it is deduplicated as the storage of the file on the disk is done once per sha256.
 23 | 
 24 | The third feature associated with the file is the analysis of file content that can be done via the `file_data` keyword. Signatures can be written to match on the content of a file which, for example, can be compressed in the case of HTTP or under a base64 encoded form in the case of SMTP.
 25 | 
 26 | Please see the Suricata manual for how to set up `file extraction <https://suricata.readthedocs.io/en/latest/file-extraction/file-extraction.html>`_.
 27 | 
 28 | .. index:: Fileinfo event
 29 | 
 30 | .. _fileinfo-events:
 31 | 
 32 | 
 33 | Fileinfo events
 34 | ===============
 35 | 
 36 | The structure of a `fileinfo` event is as follows:
 37 | 
 38 | .. code-block:: JSON
 39 | 
 40 |   {
 41 |     "timestamp": "2019-07-05T22:01:04.745891+0200",
 42 |     "flow_id": 2209746386047329,
 43 |     "pcap_cnt": 33861,
 44 |     "event_type": "fileinfo",
 45 |     "src_ip": "5.188.168.49",
 46 |     "src_port": 80,
 47 |     "dest_ip": "10.7.5.101",
 48 |     "dest_port": 49686,
 49 |     "proto": "TCP",
 50 |     "community_id": "1:shQmhcocLIrJ1WtOAbgShXgB5FY=",
 51 |     "http": {
 52 |       "hostname": "5.188.168.49",
 53 |       "url": "/sin.png",
 54 |       "http_user_agent": "WinHTTP loader/1.0",
 55 |       "http_content_type": "image/png",
 56 |       "http_method": "GET",
 57 |       "protocol": "HTTP/1.1",
 58 |       "status": 200,
 59 |       "length": 110718
 60 |     },
 61 |     "app_proto": "http",
 62 |     "fileinfo": {
 63 |       "filename": "/sin.png",
 64 |       "magic": "PE32 executable (GUI) Intel 80386, for MS Windows",
 65 |       "gaps": false,
 66 |       "state": "CLOSED",
 67 |       "sha1": "2408c5380ddca2bbd53b87c27132b72f0927c70f",
 68 |       "sha256": "110743634989ed7a3293b2e39ad85c255fc131c752e029f78d37d4fb8c1dc7f6",
 69 |       "stored": false,
 70 |       "size": 369664,
 71 |       "tx_id": 1
 72 |     }
 73 |   }
 74 | 
 75 | The event contains a `fileinfo` object that contains the following fields:
 76 | 
 77 |  - `filename` announced by the servers
 78 |  - `magic` computed by analyzing the beginning of the file
 79 |  - `size` to receive the file size
 80 | 
 81 | It also contains a regular `http` subobject as this file was captured on an HTTP flow. On a different application's layers, a different subobject would have been present. The field `app_proto` is a good way to know which subobject will be present. 
 82 | 
 83 | This event is a good example of the value of the various mechanisms in place in Suricata. The HTTP parser told us that the file content type (`http.http_content_type`) announced by the server is an 'image\png'. This would be fine if the analysis of content of the file did not find out (in the key `fileinfo.magic`) that the file is, in reality, an executable. For reference, this file was used in an infection by the Trickbot malware.
 84 | 
 85 | This can be confirmed by checking the sha1 or sha256 hash of the file in `Virustotal <https://www.virustotal.com/gui/file/110743634989ed7a3293b2e39ad85c255fc131c752e029f78d37d4fb8c1dc7f6>`_. This file is flagged as malicious by more than 50 security vendors and associated to Trickbot by some of them as well.
 86 | 
 87 | .. figure:: img/virustotal.png
 88 |   
 89 |    Information from Virustotal on the file.
 90 | 
 91 | 
 92 | Detection on tracked files
 93 | ==========================
 94 | 
 95 | file.data keywords
 96 | ------------------
 97 | 
 98 | The `file.data` keyword matches on the content of the file, so it can be used to do an analysis of the content of the transferred file with the inspection capability of Suricata. This keyword is aliased to `file_data` (which is used in a lot of available signatures as it is the original name). The keyword alias `file.data` is a sticky buffer, so it will trigger matching on the file content for all subsequent match keywords.
 99 | 
100 | Let's take an example with the following signature from the Emerging Threats ruleset:
101 | 
102 | .. code-block::
103 | 
104 |   alert http $EXTERNAL_NET any -> $HOME_NET any ( \\
105 |     msg:"ET SCADA PcVue Activex Control Insecure method (AddPage)"; \\
106 |     flow:to_client,established; \\
107 |     file.data; content:"<OBJECT "; nocase; content:"classid"; nocase; distance:0; \\
108 |        content:"CLSID"; nocase; distance:0; \\
109 |        content:"083B40D3-CCBA-11D2-AFE0-00C04F7993D6"; nocase; distance:0; \\
110 |        content:".AddPage"; nocase; \\
111 |        content:"<OBJECT"; nocase; \\
112 |        pcre:"/^[^>]*?classid\s*=\s*[\x22\x27]?\s*clsid\s*\x3a\s*\x7B?\s*?083B40D3-CCBA-11D2-AFE0-00C04F7993D6/Rsi"; \\
113 |        reference:url,exploit-db.com/exploits/17896; classtype:attempted-user; \\
114 |        sid:2013730; rev:4; \\
115 |     )
116 | 
117 | This is triggering on https://www.exploit-db.com/exploits/17896 that is a DOS on Activex. This signature is over the HTTP protocol and it is using the `file.data` keyword. This happens because the HTTP protocol is usually compressing the data sent from the server to lower the bandwidth. As a result, a simple match on the content would have failed. By using a content match on `file.data`, we ensure a correct match on the content that is seen by the browser  even if there is server-side compression as Suricata will uncompress the content to pass the clear text content to the `file.data` keyword.
118 | 
119 | The matching done in the signature is an interesting use of sticky buffer. It first does multiple content matches to check that all fixed string parts
120 | of the attack are there. This lowers the risk of evaluating the costly regular expression that is used as a final check for the presence of the
121 | attack in the server message.
122 | 
123 | 
124 | Magic analysis
125 | --------------
126 | 
127 | Among the keywords dealing with the file, we find `file.magic`. This is a sticky buffer matching on the result of Magic inspection.
128 | This can, for example, be used to detect the executables masqueraded as an image seen in the previous section:
129 | 
130 | .. code-block::
131 | 
132 |   alert http any any -> any any (msg:"masquerade file"; \\
133 |         http.content_type; content:"image"; \\
134 |         file.magic; content:"executable";)
135 | 
136 | Another simple possibility offered by `file.magic` is file extraction selection. For example, to extract all PDF to disk, one can use:
137 | 
138 | 
139 | .. code-block::
140 | 
141 |   alert tcp any any -> any any (msg:"PDF extraction"; \\
142 |         file.magic; content:"pdf"; nocase; \\
143 |         filestore;)
144 | 
145 | 
146 | Known bad and known good list
147 | -----------------------------
148 | 
149 | If checksum of file is really interesting information found in the `fileinfo` events, they can also be matched on via the `filemd5 <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html#filemd5>`_,
150 | `filesha1 <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html#filesha1>`_, and 
151 | `filesha256 <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html#filesha256>`_ keywords. All of these work the same way: they are given a file as an argument that has to contain one checksum per line and they will match if the checksum of the file is on the list (or not if the match is negated). 
152 | 
153 | For example, to alert on all executables that are not on the list of known good executables (built from another tool), one can use:
154 | 
155 | .. code-block::
156 | 
157 |   alert smb any any -> any any (msg:"Unknown executable file on SMB"; \\
158 |         filesha256:!sha256-goodexe; \\
159 |         file.name; content:".exe"; endswith; \\
160 |         sid:1; rev:1;)
161 | 
162 | 
163 | Threat hunting with file
164 | ========================
165 | 
166 | Masqueraded files
167 | -----------------
168 | 
169 | The masqueraded files described in :ref:`fileinfo-events` can be detected by looking at the `fileinfo` events.
170 | 
171 | In Elasticsearch, you can simply detect executable masqueraded as PDF with the following request:
172 | 
173 | .. code-block::
174 | 
175 |   fileinfo.filename.keywords:*.pdf AND fileinfo.magic:"executable"
176 | 
177 | You can also be more generic with querying all executables that do not end up with a regular extension:
178 | 
179 | .. code-block::
180 | 
181 |   fileinfo.magic:"executable" -fileinfo.filename.keyword:*.exe -fileinfo.filename.keyword:*.dll -fileinfo.filename.keyword:*.com
182 | 
183 | And if you want to zoom on internal protocol, you can do:
184 | 
185 | .. code-block::
186 | 
187 |    (app_proto:"smb" OR app_proto:"nfs") AND  \\
188 |       (fileinfo.magic:"executable" -fileinfo.filename.keyword:*.exe -fileinfo.filename.keyword:*.dll -fileinfo.filename.keyword:*.com)
189 | 
190 | Splunk users can write this last one with:
191 | 
192 | .. code-block::
193 | 
194 |    app_proto IN ("smb", "nfs") |
195 |      regex fileinfo.magic = "(?i)executable" |
196 |      NOT (fileinfo.filename="*.exe" OR fileinfo.filename="*.dll" OR fileinfo.filename="*.com")
197 | 
198 | 
199 | Long file name
200 | --------------
201 | 
202 | The file names are usually kept short when they are linked to legitimate behavior because nobody likes to type
203 | or read lengthy strings. Because of this, it is interesting to look at any executable file transfer where the filename is 
204 | at least 15 characters long and does not finish on ".exe" (installers could have a longer name). 
205 | 
206 | This can be done with:
207 | 
208 | .. code-block::
209 | 
210 |   fileinfo.type:"executable" AND fileinfo.filename.keyword:/.{15}.*/  \\
211 |     -fileinfo.filename.keyword:*.exe
212 | 
213 | 
214 | Entropy on SMB file transfer
215 | ----------------------------
216 | 
217 | `Entropy <https://en.wikipedia.org/wiki/Entropy_(information_theory)>`_ is the next logical step after looking into a long filename because it measures the randomness of the data. In a lot of cases, malware uses randomly generated file names to avoid collision with existing files.
218 | 
219 | Entropy can be computed in Splunk by using the `URL Toolbox App <https://splunkbase.splunk.com/app/2734/>`_. For example, let's compute the entropy of the executable filename and get the list of filename sorted by entropy: 
220 | 
221 | .. code-block::
222 | 
223 |   event_type=fileinfo app_proto=smb |
224 |   regex fileinfo.magic = "(?i)executable" |
225 |   `ut_shannon(fileinfo.filename)` |
226 |   eval entropy = round(ut_shannon, 2) |
227 |   stats min(timestamp), max(timestamp) by fileinfo.filename, entropy, fileinfo.sha256 |
228 |   sort -entropy
229 | 
230 | An entropy value of 4 is already high with regards to a filename, so filtering on value can allow you to focus on suspect elements.
231 | 
232 | 


--------------------------------------------------------------------------------
/source/flow-hunting.rst:
--------------------------------------------------------------------------------
  1 | =============
  2 | Flow Analysis
  3 | =============
  4 | 
  5 | Introduction
  6 | ============
  7 | 
  8 | Flow data, also known as `Netflow <https://en.wikipedia.org/wiki/NetFlow>`_ data, is a well known network analysis tool that has long been used for
  9 | security purposes. The concept, which was introduced by Cisco in 1996, centered around doing accounting on sessions. Basic information found in flow data includes the number of bytes and packets, the start and end time of a flow, and the IP information needed to identify the flow. 
 10 | 
 11 | The concept of a session is straight forward for TCP, but for sessionless protocols an approximation is used based on an internally defined timeout.
 12 | For example, a flow on UDP will be opened when a client sends data to a server from a given port and to a given port. It will end when no information
 13 | is sent one way or another for a predefined duration. The timeout really depends of the systems collecting the information, and in the case of Suricata
 14 | it can be setup per protocol.
 15 | 
 16 | 
 17 | Flow events in Suricata
 18 | =======================
 19 | 
 20 | Suricata generates its own flow log independent of other events like alerts or protocol logs. It internally tracks UDP, TCP, SCTP, and ICMP for analysis purpose and
 21 | uses the collected information to generate entries for every flow.
 22 | 
 23 | There are two types of flow events:
 24 | - ``flow``: one entry per flow so volumetry for client and server data is available in the same event
 25 | - ``netflow``: two entries per flow, client and server accounting are in 2 different events
 26 | 
 27 | The ``flow_id`` key is obviously present in flow and can be used to correlate flow data with other events. In the case of netflow, it is used to correlate both sides of the flow.
 28 | 
 29 | A typical ``flow`` entry looks like the following:
 30 | 
 31 | .. code-block:: JSON
 32 | 
 33 |   {
 34 |     "timestamp": "2021-11-17T23:43:24.129401+0100",
 35 |     "flow_id": 1115914617724757,
 36 |     "in_iface": "enp5s0",
 37 |     "event_type": "flow",
 38 |     "src_ip": "2a02:1511:5172:1d50:3615:b3a2:a98a:c71f",
 39 |     "src_port": 34096,
 40 |     "dest_ip": "2a00:87c0:2021:2021:0050:0000:4000:0539",
 41 |     "dest_port": 443,
 42 |     "proto": "TCP",
 43 |     "app_proto": "tls",
 44 |     "flow": {
 45 |       "pkts_toserver": 3300,
 46 |       "pkts_toclient": 6684,
 47 |       "bytes_toserver": 334979,
 48 |       "bytes_toclient": 9887597,
 49 |       "start": "2021-11-17T23:32:59.915179+0100",
 50 |       "end": "2021-11-17T23:38:59.483429+0100",
 51 |       "age": 360,
 52 |       "state": "closed",
 53 |       "reason": "shutdown",
 54 |       "alerted": false
 55 |     },
 56 |     "tcp": {
 57 |       "tcp_flags": "1f",
 58 |       "tcp_flags_ts": "1e",
 59 |       "tcp_flags_tc": "1b",
 60 |       "syn": true,
 61 |       "fin": true,
 62 |       "rst": true,
 63 |       "psh": true,
 64 |       "ack": true,
 65 |       "state": "closed"
 66 |     }
 67 |   }
 68 | 
 69 | We find the traditional IP information (``src_ip``, ``src_port``, ``dest_ip``, ``dest_port``) and some information for the application layer
 70 | with the key ``app_proto`` that is here set to ``tls``. As this is a ``TCP`` flow, we have a ``tcp`` subobject that contains a set of keys. If
 71 | ``state`` is coding the state of the session in the TCP engine at the end of the flow, the rest of the keys are coding information about the flags
 72 | seen on the TCP session. The boolean values are set to true if the corresponding flag has been seen. The three ``tcp_flags*`` key contain the
 73 | hexadecimal value of the integer obtained by setting to 1 all flags seen during the life of the session. ``tcp_flags`` is global, and ``tcp_flags_ts`` stores the information for packets sent to the server while ``tcp_flags_tc`` stores the ones sent to the client.
 74 | 
 75 | .. note::
 76 | 
 77 |   Check the `eve Flow format <https://suricata.readthedocs.io/en/latest/output/eve/eve-json-format.html?highlight=http#event-type-flow>`_ page in Suricata manual for more information on the flow events.
 78 | 
 79 | 
 80 | Flow Analysis
 81 | =============
 82 | 
 83 | There is a great amount of techniques which use flow to find anomalies in the traffic by applying statistical analysis or machine learning to the events.
 84 | We are not going to cover this in this document, but we will showcase some simple examples.
 85 | 
 86 | ICMP flow with abnormal size
 87 | ----------------------------
 88 | 
 89 | ICMP flows are usually short as they are mostly used to check connectivity from one point
 90 | in the network to another. Being short, the amount of bytes exchanged are usually small.
 91 | Some data exfiltration techniques abuse ICMP to send the data out without being noticed.
 92 | But in a lot of cases, these techniques send their data on the same flow. So looking
 93 | at flows that shows important data transfer (like 150kb for example) is interesting.
 94 | 
 95 | Using Lucene syntax, it can be written in Kibana as follows:
 96 | 
 97 | .. code-block::
 98 | 
 99 |   event_type:flow AND \\
100 |     (proto:"ICMP" OR proto:"IPv6-ICMP") AND \\
101 |     (flow.bytes_toclient:>150000 OR flow.bytes_toserver:>150000 ) AND \\
102 |     flow.bytes_toclient:>0 AND flow.bytes_toserver:>0
103 | 
104 | In Splunk, one can use:
105 | 
106 | .. code-block::
107 | 
108 |   event_type=flow AND
109 |    (proto="ICMP" OR proto="IPv6-ICMP") AND
110 |    (flow.bytes_toclient>150000 OR flow.bytes_toserver>150000) AND
111 |    flow.bytes_toclient>0 AND flow.bytes_toserver>0
112 | 
113 | High volume DNS flow
114 | --------------------
115 | 
116 | Similar to the previous example, DNS can also be used for data exfiltration and
117 | a potential consequence of the technique used is the existence of DNS flow where
118 | a big amount of data has been transferred.
119 | 
120 | Using Lucene syntax, it can be written in Kibana as follows:
121 | 
122 | .. code-block::
123 | 
124 |   event_type:flow AND app_proto:dns AND \\
125 |     flow.bytes_toclient:>5000 OR flow.bytes_toserver:>5000
126 | 
127 | Potential ICMP evasion
128 | ----------------------
129 | 
130 | In a standard environment, the reply to an ICMP query is an ICMP response
131 | that contains the same data. As a result the size of the data in the direction
132 | of the client and in the direction of the server are equal.
133 | 
134 | Using Splunk, matching events can be obtained by doing:
135 | 
136 | .. code-block::
137 | 
138 |   event_type=flow AND proto=icmp AND flow.bytes_toclient!=flow.bytes_toserver
139 | 
140 | Using Kibana, it is possible to do the same in 2 steps. First define a Query DSL as follows:
141 | 
142 | .. code-block:: JSON
143 | 
144 |   {
145 |     "query": {
146 |       "bool": {
147 |         "filter": {
148 |           "script": {
149 |             "script": {
150 |               "lang": "painless",
151 |               "source": "doc['flow.bytes_toclient'].value!=doc['flow.bytes_toserver'].value"
152 |             }
153 |           }
154 |         }
155 |       }
156 |     }
157 |   }
158 | 
159 | See :numref:`query-dsl` for help on adding this Query DSL filter in discover window:
160 | 
161 | .. _query-dsl:
162 | .. figure:: img/query-dsl.png
163 |    :scale: 30%
164 |   
165 |    Query DSL edition in Kibana.
166 | 
167 | Then, the following filter can be added to select the ICMP messages:
168 | 
169 | .. code-block::
170 | 
171 |   event_type:flow AND proto:icmp
172 | 


--------------------------------------------------------------------------------
/source/foreword.rst:
--------------------------------------------------------------------------------
 1 | Preface
 2 | =======
 3 | 
 4 | We are pleased to present the industry’s first open-source book on the world’s most popular open-source network security engine, Suricata. The idea for this book emerged after it became obvious to us that many security practitioners using Suricata either struggle to effectively use the most powerful capabilities of the tool or simply don’t realize they exist. 
 5 | 
 6 | Each year, we speak at many industry conferences and train hundreds of users in workshops on behalf of the Open Information Security Foundation (OISF). In our engagements with the audience at these events, we have noticed that users share the common perception that Suricata is a classic signature-based intrusion detection system (IDS), albeit a powerful and high-performance one. Most fail to realize that the Suricata engine can also simultaneously produce protocol transaction logs and flow records that are correlated with the IDS alerts. These can be incredibly powerful for security analysts during an incident investigation or a threat hunt. And they can be even more powerful for the development of anomaly detection using Suricata.
 7 | 
 8 | So we decided to write a simple book to introduce the most powerful features and concepts developed in Suricata over its 12-year history. 
 9 | 
10 | As you may be aware, there is a dedicated team of Suricata developers continuously working to improve Suricata and releasing new capabilities regularly. So, we decided to take a more open-source software development approach to the content and release cadence of the book in order to keep it relevant and up-to-date. 
11 | 
12 | The book is structured as a loose collection of chapters, each focused on a single subject area, such as Suricata rule writing or TLS detection and threat hunting. All its content is developed and managed on a `GitHub repository <https://github.com/StamusNetworks/suricata-4-analysts>`_ and is open to all who wish to comment or contribute ideas. Readers who are looking for a simple text edition may access all content there. The book is also available for download in PDF and eReader format on the `Stamus Networks website <https://www.stamus-networks.com/suricata-4-analysts>`_.
13 | 
14 | The open-source format makes it a living book that will grow and evolve over time with ongoing input from the authors as well as contributions and feedback from the Suricata community. 
15 | 
16 | We would like to thank everyone at Stamus Networks for their support during the making of this book. And this book would not have been possible without the help of the amazing team at OISF.
17 | 
18 | .. note::
19 | 
20 |    This book is not meant to act as a replacement for the Suricata manual, which is an excellent reference tool for those installing and deploying the platform. Instead, The Security Analyst’s Guide to Suriata was written for the SOC analysts and threat hunters who have been tasked with effectively defending their network using Suricata. We aim to provide vital information on entry points and in-depth coverage for the most important Suricata features.
21 | 
22 | We welcome your feedback. Enjoy.
23 | 


--------------------------------------------------------------------------------
/source/generic-hunting.rst:
--------------------------------------------------------------------------------
 1 | Protocol independent threat hunting
 2 | ===================================
 3 | 
 4 | 
 5 | Threat Hunting with IDS and NSM data
 6 | ------------------------------------
 7 | 
 8 | Suricata is both an IDS and an NSM tool. It will extract and generate protocol transaction logs independently of alerts. As a result, the threat hunter has the responsibility of finding the types of events where searching for results on the data makes the most sense. 
 9 | 
10 | Let's take two examples of an Indicator Of Compromise (IOC):
11 | 
12 | - an SMB user name created by a threat actor when he has taken control of an Active Directory. Let's say this username is 'pandabear'.
13 | - a domain that is an uncommonly used cloud provider. Let's say the domain is 'sovereigncloud.eu'
14 | 
15 | What these IOCs have in common is that first thing the threat hunter must do is query the NSM data to see if the IOC has been seen in the network.
16 | 
17 | For 'pandabear', we can do two queries (using Splunk syntax), one to match in the SMB logs and the other one in the Kerberos logs:
18 | 
19 |  - `event_type=smb AND smb.ntlmssp.user=pandabear`
20 |  - `event_type=krb5 AND krb5.cname=pandabear`
21 | 
22 | For the domain, we can do queries on DNS (looking for the query), TLS (one Server Name Indication), and HTTP (looking for the hostname):
23 | 
24 |  - `event_type=dns AND dns.query.rrname=sovereigncloud.eu`
25 |  - `event_type=tls AND tls.sni=sovereigncloud.eu`
26 |  - `event_type=http AND http.host=sovereigncloud.eu`
27 | 
28 | In the first example, we are really in trouble if the IOC is seen in the organization because the stage of the compromise is advanced, and because 'pandabear' is not likely a regular user. In the second, seeing the IOC is just an indicator because we can have users of this cloud provider, causing a  need to discriminate among them further by doing more investigation.
29 | 
30 | For the domain, a regular check of the NSM data may be enough. For the username, on the other hand, we may want to make the switch to incident response much faster. Adding IDS signatures to detect this username if ever it appears may be a good solution: ::
31 | 
32 |  alert smb any any -> $HOME_NET any (msg:"pandabear"; smb.ntlmssp_user; content:"pandabear"; ...
33 |  alert krb5 any any -> $HOME_NET any (msg:"pandabear"; krb5.cname; content:"pandabear"; ...
34 | 
35 | Please note that the first signature will require Suricata 7.0 and that dataset is a far better way to match IOCs with Suricata signatures.
36 | 
37 | To summarize this example, because Suricata is both an IDS and an NSM, there are multiple complementary approach options when threat hunting with Suricata.
38 | 
39 | 
40 | Correlation using flow_id
41 | -------------------------
42 | 
43 | Suricata performs flow tracking over most TCP/IP protocols. In the case of TCP, this is a direct mapping of flows to TCP sessions. For UDP, this is completed by looking at the IP information (source IP, port and destincation IP, and port) and applying a timeout logic. 
44 | 
45 | So a flow tracks what is happening during a communication between a client and a server:
46 | 
47 | .. index:: flow_id
48 | 
49 | All IP events contain a 'flow_id' key that is the same for all events in a single flow. This allows a user to to group all events.  
50 | 
51 | An example seen in jq on a simple HTTP request. You can also see here that jq is used to reformat the events: ::
52 | 
53 |   jq 'select(.flow_id==1541199918082444)|{"time": .timestamp, "type": .event_type, "src_ip":.src_ip, "src_port": .src_port, "dest_ip": .dest_ip, "dest_port": .dest_port}' -c eve.json
54 |   {"time":"2017-07-24T15:54:12.716673+0200","type":"http","src_ip":"10.7.24.101","src_port":49163,"dest_ip":"216.239.38.21","dest_port":80}
55 |   {"time":"2017-07-24T15:56:28.177134+0200","type":"fileinfo","src_ip":"216.239.38.21","src_port":80,"dest_ip":"10.7.24.101","dest_port":49163}
56 |   {"time":"2017-07-24T16:15:05.777324+0200","type":"flow","src_ip":"10.7.24.101","src_port":49163,"dest_ip":"216.239.38.21","dest_port":80
57 | 
58 | We have three events here:
59 | 
60 |  - an HTPP request
61 |  - a file information event (analysis of the data of the transferred file)
62 |  - a flow entry containing the packets and bytes accounting as well as the duration of the flow
63 | 
64 | The flow event is generated once the flow is timed out by Suricata.
65 | 
66 | Some flows can have a lot more events if the protocol (like SMB) is doing a lot of transactions on a single flow.
67 |  
68 | 
69 | Learning datasets
70 | -----------------
71 | 
72 | At first look, the `dataset <https://suricata.readthedocs.io/en/latest/rules/datasets.html>`_ feature belongs to the IDS world (see :ref:`dataset-ioc` for example) as it provides matching on a list of elements. But 'dataset' can be enriched from the packet path, meaning it can be used to store the never-before-seen metadata.
73 | 
74 | For example, to collect all internal HTTP user agents: ::
75 | 
76 |   alert $HOME_NET any -> any any (msg:"new agent"; http.user_agent; \\
77 |     dataset:isset,http-ua,type string, state /var/lib/http-ua.lst; \\
78 | 
79 | Every time Suricata will detect an HTTP user agent that has never been seen on the network by Suricata, it will trigger an alert. These events can be used to build a list of previously unseen items for all the fields that can be matched with a sticky buffer.
80 | 
81 | In our signature, the file '/var/list/http-ua.lst' is used to store the state. Suricata will dump the contents of the list it built into memory to the file (in our case, as a base64 string). This ensures that no new events will be generated if Suricata is forced to restart.
82 | 


--------------------------------------------------------------------------------
/source/http-threat-hunting.rst:
--------------------------------------------------------------------------------
  1 | =================================
  2 | HTTP detection and threat hunting
  3 | =================================
  4 | 
  5 | Introduction
  6 | ============
  7 | 
  8 | HTTP is running the world. It is used by human's actions or, in the case of HTTPS protocol, directly or below TLS.
  9 | It is also widely used by systems via REST API and other inter-server communications.
 10 | 
 11 | One of the great benefits of HTTP is the weak message structure which makes it easy to develop a client.
 12 | It is a loose text-based protocol and as such looks very similar to free text. This makes it highly adaptable, but from
 13 | a security point of view this complicates things. Hunting something that has multiple forms can be rather complex. 
 14 | 
 15 | 
 16 | Protocol overview
 17 | =================
 18 | 
 19 | In HTTP, the client is the first to send data via an HTTP request. This message contains headers with
 20 | a few mandatory fields and a lot of optional headers which give more context to the server about the request
 21 | so it can adapt its answer. The request contains an optional body.
 22 | The server responds with an answer that has the same structure with headers and a body. This is because HTTP is focused on getting
 23 | information from the server.
 24 | 
 25 | To see an example of the minimum requirements of a request is, let's look at this minimal request to google
 26 | done via `netcat <https://en.wikipedia.org/wiki/Netcat>`_ where we ask for the home page ``/`` with protocol version ``1.1``:
 27 | 
 28 | .. code-block::
 29 | 
 30 |   # nc -v google.fr 80
 31 |   GET / HTTP/1.1
 32 | 
 33 | This is the answer from Google:
 34 | 
 35 | .. code-block::
 36 | 
 37 |   HTTP/1.1 200 OK
 38 |   Date: Sun, 25 Sep 2022 21:17:08 GMT
 39 |   Expires: -1
 40 |   Cache-Control: private, max-age=0
 41 |   Content-Type: text/html; charset=ISO-8859-1
 42 |   Server: gws
 43 |   X-XSS-Protection: 0
 44 |   X-Frame-Options: SAMEORIGIN
 45 |   Set-Cookie: AEC=AakniGO859M8HPupnneVpexM15eeWdGOBL_LX5TGiy5GsqI_Fnm0F8UEIg; expires=Fri, 24-Mar-2023 21:17:08 GMT; path=/; domain=.google.com; Secure; HttpOnly; SameSite=lax
 46 |   Accept-Ranges: none
 47 |   Vary: Accept-Encoding
 48 |   Transfer-Encoding: chunked
 49 | 
 50 |   5acf
 51 |   <!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" ...
 52 | 
 53 | This answer has the typical structure of an HTTP message with status line (here ``HTTP/1.1 200 OK``), followed by the headers (key and value),
 54 | then an empty line that is followed by the body.
 55 | 
 56 | This dissymmetry between the request and the response in this example emphasizes one of the main concepts of HTTP design: it should work even if client implementation is really poor.
 57 | 
 58 | If we look at the same HTTP request to google.fr done via Firefox, we have the following request:
 59 | 
 60 | .. code-block::
 61 | 
 62 |   GET / HTTP/1.1
 63 |   Host: google.fr
 64 |   User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0
 65 |   Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
 66 |   Accept-Language: en-US,en;q=0.5
 67 |   Accept-Encoding: gzip, deflate
 68 |   Connection: keep-alive
 69 |   Upgrade-Insecure-Requests: 1
 70 | 
 71 | The headers list is far longer and gives more information about what the client
 72 | is able to support or what it wants from the server's answer. For example, here, because
 73 | we have the header ``Upgrade-Insecure-Requests`` set to 1, we don't get the web page
 74 | content as we got in the previous request but we have a redirection to the Secure
 75 | HTTPS version of google.fr:
 76 | 
 77 | .. code-block::
 78 | 
 79 |   HTTP/1.1 301 Moved Permanently
 80 |   Location: http://www.google.fr/
 81 |   Content-Type: text/html; charset=UTF-8
 82 |   Date: Sun, 25 Sep 2022 21:33:01 GMT
 83 |   Expires: Tue, 25 Oct 2022 21:33:01 GMT
 84 |   Cache-Control: public, max-age=2592000
 85 |   Server: gws
 86 |   Content-Length: 218
 87 |   X-XSS-Protection: 0
 88 |   X-Frame-Options: SAMEORIGIN
 89 | 
 90 | As we will see later, the fact that a lot of freedom is given in the protocol
 91 | is a key point in profiling non-regular behavior that does not follow the implicit norm. 
 92 | 
 93 | 
 94 | HTTP analysis in Suricata
 95 | =========================
 96 | 
 97 | Suricata has very robust support for HTTP. The development of the parser was initiated at the beginning of the project
 98 | and has continued to evolve with continuing update releases.
 99 | 
100 | HTTP request and response are logged in a single event:
101 | 
102 | .. code-block:: JSON
103 | 
104 |   {
105 |     "timestamp": "2019-07-05T22:06:30.877497+0200",
106 |     "flow_id": 1831154258612572,
107 |     "pcap_cnt": 47339,
108 |     "event_type": "http",
109 |     "src_ip": "10.7.5.5",
110 |     "src_port": 62152,
111 |     "dest_ip": "198.12.71.157",
112 |     "dest_port": 443,
113 |     "proto": "TCP",
114 |     "pkt_src": "wire/pcap",
115 |     "tx_id": 0,
116 |     "http": {
117 |       "hostname": "198.12.71.157",
118 |       "http_port": 443,
119 |       "url": "/login/process.php",
120 |       "http_user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
121 |       "http_content_type": "text/html",
122 |       "http_method": "GET",
123 |       "protocol": "HTTP/1.1",
124 |       "status": 200,
125 |       "length": 173
126 |     }
127 |   }
128 | 
129 | The ``http`` object contains all the information about the request and the response. Fields like ``hostname`` or
130 | ``http_user_agent`` are coming from the client and fields such as ``status``, ``length``, or ``http_content_type``
131 | are coming from the server. The log also include the ``tx_id`` which stands for transaction identifier. It is
132 | giving the number of HTTP transaction (request + response) seen on the flow at the moment of the request.
133 | In this example it is 0, which means this is the first one.
134 | 
135 | As you can see, the event shown here does not contain all the headers. The dump of all headers can be activated
136 | in the configuration via the ``dump-all-headers`` configuration in the HTTP logging. This will provide far more
137 | information, but it is also going to be far more verbose:
138 | 
139 | .. code-block:: JSON
140 | 
141 |     "request_headers": [
142 |       {
143 |         "name": "Cookie",
144 |         "value": "session=okmKYUc4i80CZ2Rflxy91qtVJoI="
145 |       },
146 |       {
147 |         "name": "User-Agent",
148 |         "value": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
149 |       },
150 |       {
151 |         "name": "Host",
152 |         "value": "198.12.71.157:443"
153 |       },
154 |       {
155 |         "name": "Connection",
156 |         "value": "Keep-Alive"
157 |       }
158 |     ],
159 |     "response_headers": [
160 |       {
161 |         "name": "Content-Type",
162 |         "value": "text/html; charset=utf-8"
163 |       },
164 |       {
165 |         "name": "Content-Length",
166 |         "value": "173"
167 |       },
168 |       {
169 |         "name": "Cache-Control",
170 |         "value": "no-cache, no-store, must-revalidate"
171 |       },
172 |       {
173 |         "name": "Pragma",
174 |         "value": "no-cache"
175 |       },
176 |       {
177 |         "name": "Expires",
178 |         "value": "0"
179 |       },
180 |       {
181 |         "name": "Server",
182 |         "value": "Microsoft-IIS/7.5"
183 |       },
184 |       {
185 |         "name": "Date",
186 |         "value": "Fri, 05 Jul 2019 20:06:30 GMT"
187 |       }
188 |     ]
189 | 
190 | Another interesting feature of HTTP support in Suricata is the transparent decompression of the HTTP response body.
191 | If the client supports the feature, the server can return the object asked for by the client in a compressed form
192 | to downsize the transfer. The result is that the content of the HTTP body in the TCP stream is just compression noise.
193 | Suricata decompresses the data in real-time and provides the decompressed content to the keyword and layers that are using
194 | the HTTP response body.
195 | 
196 | The HTTP response body can be logged in alerts and this greatly improves the context provided as the stream TCP cannot be read by
197 | a human.
198 | 
199 | .. note::
200 | 
201 |   Check the `eve HTTP format <https://suricata.readthedocs.io/en/latest/output/eve/eve-json-format.html?highlight=http#event-type-http>`_ page in Suricata manual for more information on the HTTP events.
202 | 
203 | Suricata supports file extraction over HTTP, so any of the techniques and information of :ref:`File Analysis <file-analysis>` chapter
204 | apply here.
205 | 
206 | HTTP and detection
207 | ==================
208 | 
209 | HTTP keywords
210 | -------------
211 | 
212 | Suricata has more than 25 sticky buffer keywords to match on HTTP fields, covering
213 | most of the headers and the content. These last ones are interesting, specifically 
214 | ``http.response_body`` that matches on the body of the response sent by the server. As
215 | described in the previous chapter, the content sent by the server can be on a compressed
216 | form and Suricata will provide the decompressed version to the detection engine.
217 | 
218 | Most keywords match on a normalized field. This is really convenient as the
219 | rules writer does not have to take the possible variant into account. For example,
220 | the ``http.host`` keyword is normalized and will always be lowercase. This prevents
221 | trivial evasion of detection by connecting to `BaDdoMAin.OrG` instead of the regular
222 | `baddomain.org`.
223 | 
224 | In some cases, the characteristic seen in the traffic is dependant of the
225 | content seen on the wire. For this reason, Suricata is providing some alternate
226 | keywords to match on the raw, unnormalized content. For example, ``http.host.raw``
227 | will match on the HTTP host in its raw form.
228 | 
229 | Cookbook
230 | --------
231 | 
232 | .. _match subdomains:
233 | 
234 | Match on a domain and its subdomains
235 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
236 | 
237 | A domain is known to be malicious and we want to alert on all requests to this domain
238 | or any of its subdomains:
239 | 
240 | .. code-block::
241 | 
242 |    alert http any any -> any any (msg:"Bad domain"; \\
243 |         http.host; dotprefix; content:".pandabear.gov"; endswith;
244 |         sid:1; rev:1;)
245 | 
246 | The match is obtained by using the sticky buffer ``http.host`` to
247 | match on the HTTP host sent by the client. By using ``dotprefix``, a
248 | ``.`` will be prepended to the buffer so it will not match on ``lovelypandabear.gov``.
249 | Then the signature uses the ``endswith`` keyword to ensure the string ends with the specified content.
250 | It will prevent a match on a domain like ``pandabear.governed.org``.
251 | 
252 | 
253 | Checking malicious HTTP user agent
254 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
255 | 
256 | Some variants of Trickbot are using an HTTP user agent that is set to ``test``.
257 | A signature to detect this behavior could be:
258 | 
259 | .. code-block::
260 | 
261 |    alert http any any -> any any (msg:"Bad domain"; \\
262 |         http.user_agent; content:"test"; startswith; endswith;
263 |         sid:1; rev:1;)
264 | 
265 | We use the same technique as the domain with the ``endswith`` keyword
266 | that we complement with ``startswith`` to ensure full equality
267 | of the strings.
268 | 
269 | Clear text authentication and password extraction
270 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
271 | 
272 | Clear text authentication over HTTP is still relevant in some environments. 
273 | Detecting this behavior and collecting the user and password to
274 | check them against other systems to detect credential reuse is really
275 | interesting. 
276 | 
277 | This can be done with a single signature:
278 | 
279 | .. code-block::
280 | 
281 |   alert http any any -> any any (msg:"HTTP unencrypted with password"; \\
282 |        http.header; content:"Authorization|3a 20|Basic"; nocase; \\
283 |        base64_decode:bytes 0, offset 1,relative; \\
284 |        base64_data; pcre:"/([^:]+):(.+)/,flow:user,flow:password"; \\
285 |        sid:1; rev:1;)
286 | 
287 | This signature first checks for the `Authorization` header and then uses
288 | ``base64_decode`` to convert the content from base64 to regular encoding.
289 | The ``base64_data`` is a sticky buffer to access the content transformed
290 | by ``base64_decode``. In this buffer, we have the user name followed
291 | by the password so we can extract it via a regular expression using the ``pcre`` keyword.
292 | 
293 | The regular expression is really interesting as it uses the data extraction feature
294 | of Suricata:
295 | 
296 | .. code-block::
297 | 
298 |   pcre:"/([^:]+):(.+)/,flow:user,flow:password"
299 | 
300 | The regular expression has 2 groups `([^:]+)` and `(.+)`. The first
301 | one gets everything before the `:` and the second one take the rest.
302 | So the first group retrieves the user and second extracts the password. The magic appends
303 | in the modifiers: ``,flow:user,flow:password``. This is a Suricata extension.
304 | It is stating here that the first group should be stored in a flow variable named
305 | ``user`` and that second group should be stored in a flow variable named ``password``.
306 | 
307 | Doing this, the alert is augmented with a ``metadata`` object that contains a ``flowvars``
308 | with the extracted values as shown below:
309 | 
310 | .. code-block:: JSON
311 | 
312 |   {
313 |     "timestamp": "2022-01-07T15:13:40.947137+0100",
314 |     "flow_id": 206063044707455,
315 |     "pcap_cnt": 69,
316 |     "event_type": "alert",
317 |     "src_ip": "192.10.0.1",
318 |     "src_port": 58944,
319 |     "dest_ip": "192.10.0.2",
320 |     "dest_port": 80,
321 |     "proto": "TCP",
322 |     "metadata": {
323 |       "flowvars": [
324 |         {
325 |           "user": "regit"
326 |         },
327 |         {
328 |           "password": "ILoveSuri"
329 |         }
330 |       ]
331 |     },
332 | 
333 | 
334 | Hunting on HTTP events
335 | ======================
336 | 
337 | HTTP hunting signatures in ETOpen and ETPro
338 | -------------------------------------------
339 | 
340 | This is not a technique to hunt directly using application layer events, but the `ETOpen and ETPro ruleset <https://www.proofpoint.com/us/resources/data-sheets/etpro-versus-et-open-ruleset-comparison>`_ 
341 | contains a few hundred particularly interesting hunting signatures for the HTTP protocol. Enabling these
342 | signatures and considering them as pre-executed queries is highly recommended.
343 | 
344 | For example, the following signature matches on POST request using an IPv4 address as hostname and missing
345 | headers that are usually sent by regular browsers.
346 | 
347 | .. code-block::
348 | 
349 |   alert http $HOME_NET any -> $EXTERNAL_NET any ( \\
350 |         msg:"ET HUNTING GENERIC SUSPICIOUS POST to Dotted Quad with Fake Browser 2"; \\
351 |         flow:established,to_server; \\
352 |         http.method; content:"POST";
353 |         http.user_agent; content:"|20|Firefox/"; nocase; fast_pattern; \\
354 |         http.host; pcre:"/^(?:\d{1,3}\.){3}\d{1,3}/"; \\
355 |         http.header_names; content:"|0d 0a|Host|0d 0a|"; depth:8; \\
356 |            content:!"Accept-Encoding"; \\
357 |            content:!"Referer"; \\
358 |            content:!"X-Requested-With"; nocase; \\
359 |         classtype:bad-unknown; sid:2018359; rev:4; \\
360 |         metadata:created_at 2014_04_04, former_category INFO, updated_at 2020_08_20;)
361 | 
362 | This signature is interesting because it matches the Tactics, Techniques, and Procedures of
363 | some actors without having to know the threat.
364 | 
365 | 
366 | Rare HTTP user agents
367 | ---------------------
368 | 
369 | As HTTP is frequently seen on network, using the rare approach is often a good way to see outliers
370 | that can be interesting to investigate.
371 | 
372 | This can be done in Splunk via the following query:
373 | 
374 | .. code-block::
375 | 
376 |   search event_type="http" | rare http.http_user_agent | sort count | head 10
377 | 
378 | 
379 | Rare HTTP hosts queried without referrer
380 | ----------------------------------------
381 | 
382 | The list of hosts used as an entry point when browsing is fairly small in most environments.
383 | Getting the rarest one is interesting because it will exhibit potential unwanted behavior such
384 | as payload download.
385 | 
386 | This can be done in Splunk via the following query:
387 | 
388 | .. code-block::
389 | 
390 |   event_type="http" AND NOT http.http_refer=* | rare http.hostname | sort count
391 | 
392 | 
393 | HTTP errors with Abnormal Content Length
394 | ----------------------------------------
395 | 
396 | Some attackers try to hide their exchange by pretending the requests are failing. As unfound pages are
397 | usually fairly small, looking at error pages with a decent size is a good start for a hunt.
398 | 
399 | 
400 | This can be done in Splunk via the following query:
401 | 
402 | .. code-block::
403 | 
404 |   event_type="http" http.status=4* http.length>=10000 |
405 |       sort -http.length |
406 |       table src_ip, dest_ip, http.hostname, http.status, http.url, http.length
407 | 
408 | Kibana users can use the following search using Lucene syntax:
409 | 
410 | .. code-block::
411 | 
412 |    event_type:http AND http.status:>400 AND http.status:<500 AND http.length:>10000
413 | 


--------------------------------------------------------------------------------
/source/img/Suricata_SMB_Subobject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/Suricata_SMB_Subobject.png


--------------------------------------------------------------------------------
/source/img/Suricata_TLS_in_JSON.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/Suricata_TLS_in_JSON.png


--------------------------------------------------------------------------------
/source/img/Suricata_Timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/Suricata_Timeline.png


--------------------------------------------------------------------------------
/source/img/alert-metadata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/alert-metadata.png


--------------------------------------------------------------------------------
/source/img/alert-sig-metadata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/alert-sig-metadata.png


--------------------------------------------------------------------------------
/source/img/directionality-warning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/directionality-warning.png


--------------------------------------------------------------------------------
/source/img/missing-http.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/missing-http.png


--------------------------------------------------------------------------------
/source/img/mixed-content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/mixed-content.png


--------------------------------------------------------------------------------
/source/img/query-dsl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/query-dsl.png


--------------------------------------------------------------------------------
/source/img/signatures-ordered.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/signatures-ordered.png


--------------------------------------------------------------------------------
/source/img/sn-3-to-1-without-negatives.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/sn-3-to-1-without-negatives.png


--------------------------------------------------------------------------------
/source/img/sn-network-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/sn-network-diagram.png


--------------------------------------------------------------------------------
/source/img/splunk-expired-tls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/splunk-expired-tls.png


--------------------------------------------------------------------------------
/source/img/splunk-tls-cipher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/splunk-tls-cipher.png


--------------------------------------------------------------------------------
/source/img/stamus-backcover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/stamus-backcover.jpg


--------------------------------------------------------------------------------
/source/img/stamus-background.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/stamus-background.jpg


--------------------------------------------------------------------------------
/source/img/stamus-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/stamus-logo.png


--------------------------------------------------------------------------------
/source/img/stamus-title.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/stamus-title.jpg


--------------------------------------------------------------------------------
/source/img/virustotal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/virustotal.png


--------------------------------------------------------------------------------
/source/img/vscode-sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StamusNetworks/suricata-4-analysts/60e749b103db9817a56dfd232abb4b15d8dc83e2/source/img/vscode-sample.png


--------------------------------------------------------------------------------
/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. Suricata Hunting documentation master file, created by
 2 |    sphinx-quickstart on Wed Feb 17 17:27:00 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | The Security Analyst’s Guide to Suricata
 7 | ========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    foreword
14 |    intro
15 |    tools
16 |    generic-hunting
17 |    suricata-as-ids
18 |    practical-rules-writing
19 |    write-performant-rules
20 |    file-analysis
21 |    flow-hunting
22 |    tls-threat-hunting
23 |    smb-threat-hunting
24 |    http-threat-hunting
25 |    dns-threat-hunting
26 |    about
27 |    about-stamus
28 | 


--------------------------------------------------------------------------------
/source/intro.rst:
--------------------------------------------------------------------------------
 1 | Introduction to Suricata
 2 | ========================
 3 | 
 4 | .. index:: Suricata
 5 | 
 6 | An Open Source Network Threat Detection engine
 7 | ----------------------------------------------
 8 | 
 9 | Suricata is an open source intrusion detection (IDS), intrusion prevention (IPS), and network security monitoring (NSM) system. It is developed and maintained by a vast community under the guidance of the Open Information Security Foundation (OISF). The project started in 2009, and had its first official release in 2010.
10 | 
11 | The original goal of the Suricata IDS project was to develop an intrusion detection engine based on signatures, similar to its ancestor Snort but with different technological choices. The aim was to build a network IDS that would share the same detection language as Snort and have a strong focus on community. The early technology choices were to implement multi-threading, advanced HTTP support, and a port-independent protocol recognition.
12 | 
13 | Over the following dozen years or so, the project—made possible thanks to public funding—has continued to evolve. Overseeing the evolution of the platform is the `OISF <https://oisf.net/>`_, a non-profit organization created to receive the funds and take care of promoting and organizing Suricata’s growth.
14 | 
15 | Most of the funding for the project now comes through private organizations via consortium membership. The foundation and the consortium members play a significant role in advancing the technology, but Suricata development has always been and remains a community undertaking.
16 | 
17 | People outside the foundation have made revolutionary proposals, which have profoundly changed the face of the project. They have helped Suricata to evolve over the years in order to stay current, attractive, and focused on threat detection. The timeline below illustrates some of the major Suricata milestones.
18 | 
19 | .. figure:: img/Suricata_Timeline.png
20 | 
21 |    Timeline of major Suricata versions.
22 | 
23 | .. include:: 12years.rst
24 | 


--------------------------------------------------------------------------------
/source/license.rst:
--------------------------------------------------------------------------------
  1 | =======
  2 | License
  3 | =======
  4 | 
  5 | This document is licensed under the Creative Commons Attribution-ShareAlike 4.0 International license.
  6 | 
  7 | Creative Commons Attribution-ShareAlike 4.0 International
  8 | =========================================================
  9 | 
 10 | Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.
 11 | 
 12 | Using Creative Commons Public Licenses
 13 | --------------------------------------
 14 | 
 15 | Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
 16 | 
 17 | - **Considerations for licensors:** Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. `More considerations for licensors <http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensors>`_.
 18 | 
 19 | - **Considerations for the public:** By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. `More considerations for the public <http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensees>`_.
 20 | 
 21 | Creative Commons Attribution-ShareAlike 4.0 International Public License
 22 | ========================================================================
 23 | 
 24 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
 25 | 
 26 | Section 1 – Definitions.
 27 | ------------------------
 28 | 
 29 | a. **Adapted Material** means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
 30 | 
 31 | b. **Adapter's License** means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
 32 | 
 33 | c. **BY-SA Compatible License** means a license listed at [creativecommons.org/compatiblelicenses](http://creativecommons.org/compatiblelicenses), approved by Creative Commons as essentially the equivalent of this Public License.
 34 | 
 35 | d. **Copyright and Similar Rights** means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
 36 | 
 37 | e. **Effective Technological Measures** means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
 38 | 
 39 | f. **Exceptions and Limitations** means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
 40 | 
 41 | g. **License Elements** means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.
 42 | 
 43 | h. **Licensed Material** means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
 44 | 
 45 | i. **Licensed Rights** means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
 46 | 
 47 | j. **Licensor** means the individual(s) or entity(ies) granting rights under this Public License.
 48 | 
 49 | k. **Share** means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
 50 | 
 51 | l. **Sui Generis Database Rights** means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
 52 | 
 53 | m. **You** means the individual or entity exercising the Licensed Rights under this Public License. **Your** has a corresponding meaning.
 54 | 
 55 | Section 2 – Scope.
 56 | ------------------
 57 | 
 58 | a. **License grant.**
 59 | 
 60 |  1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
 61 | 
 62 |   A. reproduce and Share the Licensed Material, in whole or in part; and
 63 | 
 64 |   B. produce, reproduce, and Share Adapted Material.
 65 | 
 66 |  2. **Exceptions and Limitations.** For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
 67 | 
 68 |  3. **Term.** The term of this Public License is specified in Section 6(a).
 69 | 
 70 |  4. **Media and formats; technical modifications allowed.** The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
 71 |     
 72 |  5. **Downstream recipients.**
 73 | 
 74 |   A. **Offer from the Licensor – Licensed Material.** Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
 75 |         
 76 |   B. **Additional offer from the Licensor – Adapted Material.** Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.
 77 | 
 78 |   C. **No downstream restrictions.** You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
 79 | 
 80 |  6. **No endorsement.** Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
 81 |     
 82 | b. **Other rights.**
 83 | 
 84 |  1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
 85 | 
 86 |  2. Patent and trademark rights are not licensed under this Public License.
 87 | 
 88 |  3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
 89 |     
 90 | Section 3 – License Conditions.
 91 | -------------------------------
 92 | 
 93 | Your exercise of the Licensed Rights is expressly made subject to the following conditions.
 94 | 
 95 | a. **Attribution.**
 96 | 
 97 |  1. If You Share the Licensed Material (including in modified form), You must:
 98 | 
 99 |   A. retain the following if it is supplied by the Licensor with the Licensed Material:
100 | 
101 |    i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
102 | 
103 |    ii. a copyright notice;
104 | 
105 |    iii. a notice that refers to this Public License;
106 | 
107 |    iv. a notice that refers to the disclaimer of warranties;
108 | 
109 |    v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
110 | 
111 |   B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
112 | 
113 |   C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
114 | 
115 |  2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
116 | 
117 |  3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
118 |     
119 | b. **ShareAlike.**
120 | 
121 | In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
122 | 
123 | 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.        
124 | 
125 | 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
126 | 
127 | 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
128 | 
129 | Section 4 – Sui Generis Database Rights.
130 | ----------------------------------------
131 | 
132 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
133 | 
134 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
135 | 
136 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
137 | 
138 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
139 | 
140 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
141 | 
142 | Section 5 – Disclaimer of Warranties and Limitation of Liability.
143 | -----------------------------------------------------------------
144 | 
145 | a. **Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.**
146 | 
147 | b. **To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.**
148 | 
149 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
150 | 
151 | Section 6 – Term and Termination.
152 | ---------------------------------
153 | 
154 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
155 | 
156 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
157 | 
158 |  1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
159 | 
160 |  2. upon express reinstatement by the Licensor.
161 | 
162 |  For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
163 | 
164 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
165 | 
166 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
167 | 
168 | Section 7 – Other Terms and Conditions.
169 | ---------------------------------------
170 | 
171 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
172 | 
173 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.t stated herein are separate from and independent of the terms and conditions of this Public License.
174 | 
175 | Section 8 – Interpretation.
176 | ---------------------------
177 | 
178 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
179 | 
180 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
181 | 
182 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
183 | 
184 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
185 | 
186 | ``Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.``
187 | 
188 | ``Creative Commons may be contacted at creativecommons.org.``
189 | 


--------------------------------------------------------------------------------
/source/practical-rules-writing.rst:
--------------------------------------------------------------------------------
  1 | Practical rule writing
  2 | =======================
  3 | 
  4 | 
  5 | Methodology
  6 | -----------
  7 | 
  8 | There are a few techniques that greatly improve the rule writing experience.
  9 | 
 10 | 
 11 | Use a PCAP file
 12 | ~~~~~~~~~~~~~~~
 13 | 
 14 | Writing a rule is an iterative process, so it is easier to write the rule using a PCAP
 15 | file that can be replayed multiple times instead of doing it on live traffic.
 16 | 
 17 | So, try to capture a PCAP trace of the behavior you want to inspect, then
 18 | you can replay it when your signature needs to be tested.
 19 | 
 20 | To replay the pcap, you can use something like (create data directory first) ::
 21 | 
 22 |  rm data/eve.json
 23 |  suricata -r ./trace.pcap -l data/
 24 |  cat eve.json | jq 'select(.alert.signature_id==1000000)'
 25 | 
 26 | if your signature ID is 1000000.
 27 | 
 28 | The 1000000-1999999 range is reserved for internal usage, so it is a good choice.
 29 | Contact the `Sid Allocation project <https://sidallocation.org/>`_ if you want
 30 | to publish your rules publicly.
 31 | 
 32 | 
 33 | Replay with only your rules file
 34 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 35 | 
 36 | To speed up the writing of a rule, you need tests to be fast. The -S flag is here to help.
 37 | Suricata will only load the rules in the file provided after the option. As a result, the run
 38 | will take only a few seconds instead of 30 seconds or more if Suricata needs to build a complete
 39 | detection engine.
 40 | 
 41 | With this option, the testing process becomes ::
 42 | 
 43 |  rm data/eve.json
 44 |  suricata -r ./trace.pcap -l data/ -S ./my.rules
 45 |  cat eve.json | jq 'select(.event_type=="alert")'
 46 | 
 47 | 
 48 | Add IP filtering in later stage
 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 50 | 
 51 | It is better to write a signature starting with 'any any -> any any' then add a filter like
 52 | '$HOME_NET any -> $EXTERNAL_NET any'. The source and destination IP depends on the signature
 53 | and the HOME_NET may not be correctly defined with regards to the data in the PCAP file.
 54 | The result is that the signature might just not match because of 
 55 | that and not because of a complex regular expression you added in the signature.
 56 | 
 57 | 
 58 | Writing a rule - step by step
 59 | -----------------------------
 60 | 
 61 | The following is a suggestion for a process to use when writing signatures:
 62 | 
 63 | 
 64 | Get a pcap file
 65 | ~~~~~~~~~~~~~~~
 66 | 
 67 | First step is to get a PCAP file with the content you want triggering the rule. Don't hesitate to filter out things in the pcap.
 68 | For example, if you want to match on a single flow you can do something like ::
 69 | 
 70 |  tcpdump -r input.pcap -w work.pcap port 53535 and port 443
 71 | 
 72 | where 53535 and 443 are the source and destination ports of the flow you want to match
 73 | on. You can also add a few 'host' filters in the BPF if the previous command returned
 74 | more than one flow.
 75 | 
 76 | Now we can use the file 'work.pcap' for our tests.
 77 | 
 78 | 
 79 | Run the file inside Suricata
 80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 81 | 
 82 | By running Suricata without any rules on the file, we can extract all the metadata seen by Suricata: ::
 83 | 
 84 |  rm data/eve.json
 85 |  suricata -r ./trace.pcap -l data/ -S /dev/null
 86 |  # explose data/eve.json
 87 | 
 88 | In most cases, it will be good enough to get an idea of what fields we should have matched on.
 89 | As the data is coming from Suricata itself, the string will be exactly what we should use
 90 | in the signature.
 91 | 
 92 | If you need more inspection, you can use `Wireshark <https://www.wireshark.org/>`_ to do so.
 93 | You can also see Suricata data in Wireshark
 94 | by using `Suriwire <https://github.com/regit/suriwire>`_.
 95 | 
 96 | .. _write-signature:
 97 | 
 98 | 
 99 | Write your signature
100 | ~~~~~~~~~~~~~~~~~~~~
101 | 
102 | We highly recommend using a text editor supported by the :ref:`Suricata Language Server <suricata-ls>` for editing.
103 | Using the editor with the Suricata Language Server extension allows you to easily identify errors and take advantage of auto-completion. During the writing phase, this is easier to have a file containing a single signature.
104 | 
105 | We can then test if the rule is alerting by running: ::
106 | 
107 |  rm data/eve.json
108 |  suricata -r ./trace.pcap -l data/ -S my.rules -v
109 |  cat eve.json | jq 'select(.event_type=="alert")'
110 | 
111 | The last command may not even be necessary. This is because by adding '-v' we will have the number of alerts at the end of the output. ::
112 | 
113 |  [9093] 9/8/2022 -- 23:50:47 - (counters.c:871) <Info> (StatsLogSummary) -- Alerts: 1
114 | 
115 | If you are not using the :ref:`Suricata Language Server <suricata-ls>`, you need to do an engine analysis with Suricata
116 | to get warnings and performance hints on the signature: ::
117 | 
118 |  suricata --engine-analysis -l data/ -S my.rules -v
119 |  cat data/rules_analysis.txt
120 | 
121 | As mentioned before, the easiest approach is to get an iterative approach here:
122 | 
123 | - Start with a simple content match on one of the sticky buffer keywords
124 | - Add some more contents match if needed
125 | - Complete with a regular expression if needed
126 | - Set up the variable for the IPs (HOME_NET, EXTERNAL_NET for example)
127 | - Add the metadata keyword for more usable data
128 | 
129 | Between each step, run Suricata to verify that your output is correct.
130 | 
131 | See the chapter :ref:`Write performant Suricata rules <performant-rules>` for more details and explanation on the steps described
132 | above and especially the :ref:`Performance improvement process <rules-perfomance-improvement>` section.
133 | 


--------------------------------------------------------------------------------
/source/smb-threat-hunting.rst:
--------------------------------------------------------------------------------
  1 | =================================
  2 | SMB detection and threat hunting
  3 | =================================
  4 | 
  5 | Introduction
  6 | ============
  7 | 
  8 | SMB (Server Message Block) is a client-server communication protocol that has many implementations and is primarily used for sharing access to files, printers, and resources on the network. The Microsoft windows networks variant is known as Microsoft SMB Protocol. Other systems and OS types like Linux and Mac also include support for SMB.
  9 | 
 10 | There are many versions and history revisions
 11 | 
 12 | - SMB 1.0
 13 | - CIFS
 14 | - SMB 2.0
 15 | - SMB 2.1
 16 | - SMB 3.0
 17 | - SMB 3.0.2
 18 | - SMB 3.1.1
 19 | 
 20 | as well as third party implementations
 21 | 
 22 | - Samba
 23 | - Netsmb
 24 | - NQ
 25 | - MoSMB
 26 | - Fusion File Share by Tuxera
 27 | - Likewise
 28 | 
 29 | 
 30 | The implementation and the central internal usage of the protocol by many types of operating systems makes it an ideal medium to be used by threat actors for internal/lateral movement. Once a foothold is established, the actor can utilize built-in and default available functionalities.
 31 | 
 32 | 
 33 | Protocol overview
 34 | =================
 35 | 
 36 | SMB Protocol functionality can also include the following
 37 | 
 38 | - Dialect negotiation
 39 | - Determining other Microsoft SMB Protocol servers on the network, or network browsing
 40 | - Printing over a network
 41 | - File, directory, and share access authentication
 42 | - File and record locking
 43 | - File and directory change notification
 44 | - Extended file attribute handling
 45 | - Unicode support
 46 | 
 47 | which makes it even more interesting and potent in terms of network visibility and monitoring.
 48 | 
 49 | 
 50 | SMB analysis in Suricata
 51 | =========================
 52 | 
 53 | Suricata supports protocol analysis and logging of all SMB versions like SMB 1.x, SMB 2.x and SMB 3.x.
 54 | Since Suricata 6, SMB has been further improved thanks to community feedback and code donation.
 55 | 
 56 | .. code-block:: JSON
 57 | 
 58 |   {
 59 |     "timestamp": "2022-05-04T18:51:26.052278+0300",
 60 |     "flow_id": 1941808952834204,
 61 |     "pcap_cnt": 1189,
 62 |     "event_type": "smb",
 63 |     "src_ip": "10.136.0.69",
 64 |     "src_port": 49622,
 65 |     "dest_ip": "10.136.0.64",
 66 |     "dest_port": 445,
 67 |     "proto": "TCP",
 68 |     "pkt_src": "wire/pcap",
 69 |     "metadata": {
 70 |       "flowbits": [
 71 |         "ET.smbdcerpc.endians"
 72 |       ]
 73 |     },
 74 |     "smb": {
 75 |       "id": 85,
 76 |       "dialect": "3.11",
 77 |       "command": "SMB2_COMMAND_CREATE",
 78 |       "status": "STATUS_SUCCESS",
 79 |       "status_code": "0x0",
 80 |       "session_id": 52777564766265,
 81 |       "tree_id": 9,
 82 |       "filename": "PSEXESVC.exe",
 83 |       "disposition": "FILE_OPEN",
 84 |       "access": "normal",
 85 |       "created": 1651679428,
 86 |       "accessed": 1651679428,
 87 |       "modified": 1651679428,
 88 |       "changed": 1651679428,
 89 |       "size": 383872,
 90 |       "fuid": "000002a0-000c-0000-0021-00000000000c"
 91 |     }
 92 |   }
 93 | 
 94 | The ``smb`` object contains all the information about the specific SMB transaction. The ``smb`` object can be found in both ``"event_type":"alert"`` as supplemental metadata and as a stand alone SMB protocol log (``"event_type":"smb"``). It has detailed ``key:value`` field pairs giving information about the transaction. In the example above, ``filename`` is the name of the file accessed or transferred, ``disposition`` is instructing the action the server must take if the file already exists, ``command`` is containing the actual SMB command, and ``status`` has the return status of the command.
 95 | 
 96 | .. code-block:: JSON
 97 | 
 98 |   "smb": {
 99 |     "id": 3,
100 |     "dialect": "3.11",
101 |     "command": "SMB2_COMMAND_SESSION_SETUP",
102 |     "status": "STATUS_SUCCESS",
103 |     "status_code": "0x0",
104 |     "session_id": 52777564766265,
105 |     "tree_id": 0,
106 |     "ntlmssp": {
107 |       "domain": "STCONSULT",
108 |       "user": "Administrator",
109 |       "host": "PC1"
110 |     }
111 | 
112 | Other useful information is also available depending on the different SMB transaction or request. In the example above we have information about a session setup with details about ``domain`` - the domain , ``user`` - the user establishing the session,  and the ``host`` it is established from.
113 | 
114 | .. code-block:: JSON
115 | 
116 |   "smb": {
117 |     "id": 73,
118 |     "dialect": "3.11",
119 |     "command": "SMB2_COMMAND_WRITE",
120 |     "status": "STATUS_SUCCESS",
121 |     "status_code": "0x0",
122 |     "session_id": 52777564766265,
123 |     "tree_id": 1,
124 |     "dcerpc": {
125 |       "request": "BIND",
126 |       "response": "BINDACK",
127 |       "interfaces": [
128 |         {
129 |           "uuid": "367abb81-9844-35f1-ad32-98f038001003",
130 |           "version": "2.0",
131 |           "ack_result": 0,
132 |           "ack_reason": 0
133 |         },
134 |         {
135 |           "uuid": "367abb81-9844-35f1-ad32-98f038001003",
136 |           "version": "2.0",
137 |           "ack_result": 3,
138 |           "ack_reason": 0
139 |         }
140 |       ],
141 |       "call_id": 2
142 |     }
143 | 
144 | We can also count on Suricata to give us any specific data on top of SMB , like DCERPC and specific Microsoft protocol UUID (``uuid`` key).
145 | 
146 | .. note::
147 | 
148 |   Check the `eve SMB format <https://suricata.readthedocs.io/en/latest/output/eve/eve-json-format.html?highlight=http#event-type-tls>`_ page in Suricata manual for more information on the SMB events.
149 | 
150 | 
151 | SMB and detection
152 | ==================
153 | 
154 | SMB keywords
155 | -------------
156 | 
157 | Out of the box, Suricata supports the following keywords in alerts for matching inside the SMB transactions, all of which are sticky buffers:
158 | 
159 | - dcerpc.iface: Match on the UUID of the protocol
160 | - dcerpc.opnum: Match on the opnum of the protocol
161 | - dcerpc.stub_data: Match on the stub data (data/arguments of the remote call)
162 | - smb.named_pipe: Match on SMB named pipe in tree connect
163 | - smb.share: Match on SMB share name in tree connect
164 | 
165 | These keywords can be used in rules matching. It is important to note that those keywords are separate from the protocol fields matching that can further be used in SIEM queries of the SMB protocol logs produced by Suricata.
166 | 
167 | 
168 | Hunting on SMB events
169 | ======================
170 | 
171 | SMB Scheduled task created remotely
172 | -----------------------------------
173 | 
174 | Hunting on SMB events is a big task, and to be more potent and successful it also needs infrastructure and organizational local knowledge.
175 | As an example, it might be interesting to know, highlight, and investigate when a ``Scheduled Task`` is created remotely. This is indeed a task
176 | that is definitely only done by some advanced system administrators and by some attackers.
177 | 
178 | For that we can use the following rule:
179 | 
180 | .. code-block::
181 | 
182 |   alert smb any any -> any any ( \\
183 |      msg: "SN MS Scheduled task created remotely"; \\
184 |      flow: to_server, established; \\
185 |      dcerpc.iface:378E52B0-C0A9-11CF-822D-00AA0051E40F; dcerpc.opnum:0; \\
186 |      reference:url,https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-tsch/4d44c426-fad2-4cc7-9677-bfcd235dca33; \\
187 |      metadata:created_at 2022_09_20, updated_at 2022_09_20; \\
188 |      target:dest_ip; \\
189 |      sid:1000001; rev:1;)
190 | 
191 | The resulting alert event log could look as follows, please note the ``flow`` and ``smb`` subsections of the alert event:
192 | 
193 | .. code-block:: JSON
194 | 
195 |   {
196 |     "stream": 1,
197 |     "ether": {
198 |       "dest_mac": "ff:ff:ff:28:fe:2d",
199 |       "src_mac": "ff:ff:ff:7a:71:40"
200 |     },
201 |     "timestamp": "2022-09-27T20:04:27.911458+0200",
202 |     "dest_ip": "10.10.11.15",
203 |     "tx_id": 9,
204 |     "packet_info": {
205 |       "linktype": 1
206 |     },
207 |     "flow_id": 1056255386940814,
208 |     "flow": {
209 |       "dest_ip": "10.10.11.15",
210 |       "src_ip": "10.10.22.55",
211 |       "pkts_toserver": 17,
212 |       "pkts_toclient": 15,
213 |       "bytes_toserver": 3983,
214 |       "bytes_toclient": 3240,
215 |       "start": "2022-09-27T20:04:27.311464+0200",
216 |       "src_port": 55067,
217 |       "dest_port": 445
218 |     },
219 |     "type": "json-log",
220 |     "in_iface": "eth0",
221 |     "app_proto": "smb",
222 |     "metadata": {
223 |       "flowbits": [
224 |         "ET.smbdcerpc.endians"
225 |       ]
226 |     },
227 |     "src_ip": "10.10.22.55",
228 |     "alert": {
229 |       "metadata": {
230 |         "created_at": [
231 |           "2022_09_20"
232 |         ],
233 |         "updated_at": [
234 |           "2022_09_20"
235 |         ]
236 |       },
237 |       "rev": 1,
238 |       "source": {
239 |         "port": 55067,
240 |         "ip": "10.10.22.55"
241 |       },
242 |       "action": "allowed",
243 |       "gid": 1,
244 |       "category": "",
245 |       "severity": 3,
246 |       "target": {
247 |         "port": 445,
248 |         "ip": "10.10.11.15"
249 |       },
250 |       "signature_id": 1000001,
251 |       "lateral": "intranet",
252 |       "signature": "SN MS Scheduled task created remotely"
253 |     },
254 |     "event_type": "alert",
255 |     "@version": "1",
256 |     "input": {
257 |       "type": "log"
258 |     },
259 |     "dest_port": 445,
260 |     "@timestamp": "2022-09-27T18:04:27.911Z",
261 |     "proto": "TCP",
262 |     "src_port": 55067,
263 |     "smb": {
264 |       "id": 10,
265 |       "tree_id": 1,
266 |       "session_id": 17607151321153,
267 |       "dialect": "3.11",
268 |       "dcerpc": {
269 |         "response": "UNREPLIED",
270 |         "request": "REQUEST",
271 |         "req": {
272 |           "stub_data_size": 264,
273 |           "frag_cnt": 1
274 |         },
275 |         "call_id": 2,
276 |         "opnum": 0
277 |       },
278 |       "command": "SMB2_COMMAND_IOCTL",
279 |       "status": "STATUS_PENDING",
280 |       "status_code": "0x103"
281 |     }
282 |   }
283 | 
284 | 
285 | SMB Status Access Denied
286 | ------------------------
287 | 
288 | Access denied in SMB could be common occurrences in cases when creating or connecting to a shared directory via the tree connect operation:
289 | 
290 | .. code-block:: JSON
291 | 
292 |   {
293 |     "timestamp": "2022-05-20T20:31:58.553243+0200",
294 |     "flow_id": 1047258484058895,
295 |     "event_type": "smb",
296 |     "src_ip": "10.150.1.93",
297 |     "src_port": 52092,
298 |     "dest_ip": "10.150.1.46",
299 |     "dest_port": 445,
300 |     "proto": "TCP",
301 |     "pkt_src": "wire/pcap",
302 |     "metadata": {
303 |       "flowbits": [
304 |         "ET.smbdcerpc.endians",
305 |         "ET.dcerpc.mssrvs",
306 |         "ET.smb.binary"
307 |       ]
308 |     },
309 |     "smb": {
310 |       "id": 54,
311 |       "dialect": "3.11",
312 |       "command": "SMB2_COMMAND_TREE_CONNECT",
313 |       "status": "STATUS_ACCESS_DENIED",
314 |       "status_code": "0xc0000022",
315 |       "session_id": 30786459795473,
316 |       "tree_id": 0,
317 |       "share": "\\\\WZVCDYTZUR6.GONE.LOCAL\\C$",
318 |       "share_type": "UNKNOWN"
319 |     }
320 |   }
321 | 
322 | However, what could be interesting is using the SMB protocol and flow transaction data in Suricata to detect brute forcing. The idea is to highlight all SMB flows that have many ``STATUS_ACCESS_DENIED`` command results in the same flow indicating possible brute forcing.
323 | 
324 | This could be achieved by combining 2 Suricata log fields - mainly ``flow_id`` and ``smb.status``. We can use that combination as ``flow_id`` contains the Suricata native unique flow identifier which can be used to correlate events such as alerts, flows, file transactions, and protocol logs from the same flow.
325 | 
326 | 
327 | JQ command line query
328 | ~~~~~~~~~~~~~~~~~~~~~
329 | 
330 | .. code-block::
331 | 
332 |   jq 'select(.event_type=="smb" and .smb.status == "STATUS_ACCESS_DENIED")|.flow_id' /var/log/suricata/eve.json | sort | uniq -c
333 |   10 1047258484058895
334 | 
335 | The JQ query above returns a result of 10 ``STATUS_ACCESS_DENIED`` statuses in the flow whose ``flow_id`` is ``1047258484058895``.
336 | So we have 10 instances of Denied Access in the same flow which is definitely suspicious.
337 | 
338 | Kibana query
339 | ~~~~~~~~~~~~
340 | 
341 | Create a table visualisation that uses an aggregation in Kibana on the field ``flow_id`` with the following query search:
342 | 
343 | .. code-block::
344 | 
345 |   event_type:"smb" AND smb.status:"STATUS_ACCESS_DENIED"
346 | 
347 | Splunk query
348 | ~~~~~~~~~~~~
349 | 
350 | Similar for Splunk the query can be:
351 | 
352 | .. code-block::
353 | 
354 |   event_type=smb sourcetype="suricata:smb" smb.status=STATUS_ACCESS_DENIED |
355 |       table src_ip, dest_ip, flow_id |
356 |       stats count by src_ip,dest_ip,flow_id |
357 |       sort - count
358 | 


--------------------------------------------------------------------------------
/source/stamus.sty:
--------------------------------------------------------------------------------
 1 | \usepackage{transparent}
 2 | \usepackage{eso-pic}
 3 | \usepackage{anyfontsize}
 4 | \usepackage{setspace}
 5 | \usepackage{xcolor}
 6 | %\AddToShipoutPictureBG{\includegraphics[width=\paperwidth,height=\paperheight]{stamus-logo.png}}
 7 | \newcommand\BackgroundPic{%
 8 | \put(0,0){%
 9 | \parbox[b][\paperheight]{\paperwidth}{%
10 | \vfill
11 | \centering
12 | \includegraphics[width=\paperwidth,height=\paperheight,%
13 | keepaspectratio]{stamus-title.jpg}%
14 | \vfill
15 | }}} 
16 | 
17 | \newcommand\BackCoverPic{%
18 | \put(0,0){%
19 | \parbox[b][\paperheight]{\paperwidth}{%
20 | \vfill
21 | \centering
22 | \includegraphics[width=\paperwidth,height=\paperheight,%
23 | keepaspectratio]{stamus-backcover.jpg}%
24 | \vfill
25 | }}} 
26 | 
27 | \newcommand*\cleartoleftpage{
28 | \clearpage\ifodd\c@page
29 | \hbox{}
30 | \vspace*{\fill}
31 | \thispagestyle{empty}
32 | \newpage
33 | \fi
34 | }
35 | 


--------------------------------------------------------------------------------
/source/suricata-as-ids.rst:
--------------------------------------------------------------------------------
  1 | IDS features
  2 | ============
  3 | 
  4 | 
  5 | Suricata rule language 
  6 | ----------------------
  7 | 
  8 | Suricata rule language is derived from Snort rule language from 2010 and it has since evolved to become a separate language sharing a common root.
  9 | 
 10 | `Suricata documentation <https://redmine.openinfosecfoundation.org/projects/suricata/wiki/Suricata_Rules>`_ is very comprehensive with regards to signature language and keywords and should be considered the ultimate reference guide.
 11 | 
 12 | .. index:: Signature
 13 | 
 14 | 
 15 | Anatomy of a signature
 16 | ----------------------
 17 | 
 18 | A signature has 3 parts:
 19 |  
 20 |  * A keyword for the action: alert, drop, pass, reject
 21 |  * IP options to indicate the characteristics of the IP flow
 22 |  * Match and information for the signature
 23 | 
 24 | Let's see an example: 
 25 | 
 26 | .. code-block::
 27 | 
 28 |  alert http any any -> any any (msg:"http"; \
 29 |    http.host; content:"suricata.io"; \
 30 |    sid:1; rev;1)
 31 | 
 32 | Here, Suricata will generate an alert when there is a flow where the HTTP application layer has been identified and when the HTTP host in the request contains ``suricata.io``.
 33 | 
 34 | ``msg`` is the text that will be used as message in the alert event. 
 35 | 
 36 | The ``sid`` keyword is the identifier of the signature (must be unique in the ruleset) and ``rev`` is the version
 37 | of the signature.
 38 | 
 39 | Let's take a more complete example where we want the flow to be from the internal network (identified by the variable '$HOME_NET') to the outside world (identified by the variable '$EXTERNAL_NET') and with destination port ``8080``:
 40 | 
 41 | .. code-block::
 42 | 
 43 |  alert http $HOME_NET any -> $EXTERNAL_NET 8080 (msg:"http"; \
 44 |    http.host; content:"suricata.io"; \
 45 |    sid:1; rev;1)
 46 | 
 47 | 
 48 | Suricata rule keywords
 49 | ----------------------
 50 | 
 51 | Types of keywords
 52 | ~~~~~~~~~~~~~~~~~
 53 | 
 54 | There are 3 types of matching keywords:
 55 | 
 56 |  * Sticky buffer keywords: the preferred type for performance and readability
 57 |  * Content modifier: they set the context to the previous content match
 58 |  * The keyword value: simple content match on a field
 59 | 
 60 | It is recommended to only use sticky buffer keywords in newly written rules.
 61 | 
 62 | .. index:: Sticky Buffer
 63 | 
 64 | 
 65 | Sticky buffer keywords
 66 | ~~~~~~~~~~~~~~~~~~~~~~
 67 | 
 68 | The sticky buffer keyword sets the context for the next content matches. For example:
 69 | 
 70 | .. code-block::
 71 | 
 72 |  http.host; content:"www"; content:"toto"; pcre:"/toto.[com|org]$/"; \
 73 |  http.method; content:"GET";
 74 | 
 75 | In this case, the host field in the HTTP header will match ``www`` and ``toto`` (via the content keywords) and do a regular expression match to detect the domains. Then there is a switch of context to the HTTP method and a match on GET when the method is done.
 76 | 
 77 | .. index:: Content Modifier
 78 | 
 79 | 
 80 | Content modifiers keywords
 81 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 82 | 
 83 | The content modifier keywords alter the context of the previous content keyword. As a result, the keywords need to be repeated. So if we want to implement the previous example we will need to have:
 84 | 
 85 | .. code-block::
 86 | 
 87 |  content:"www"; http_host; content:"toto"; http_host; pcre:"/toto.[com|org]$/W"; \
 88 |  content:"GET"; http_method;
 89 | 
 90 | Please note that in addition to the repetition of the keyword a modifier (``W`` in this example) has been added to the regular expression match to indicate that the match has to be done on the HTTP host.
 91 | 
 92 | 
 93 | Getting keywords from Suricata
 94 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 95 | 
 96 | You can use the following commands:
 97 | 
 98 | .. code-block::
 99 | 
100 |  suricata --list-keywords
101 |  =====Supported keywords=====
102 |  - sid
103 |  - priority
104 |  - rev
105 |  - classtype
106 |  - app-layer-protocol
107 | 
108 | Information about a specific keyword can be obtained via:
109 | 
110 | .. code-block::
111 | 
112 |  suricata --list-keywords=http.host
113 |  = http.host =
114 |  Description: sticky buffer to match on the HTTP Host buffer
115 |  Features: No option,sticky buffer
116 |  Documentation: https://suricata.readthedocs.io/en/latest/rules/http-keywords.html#http-host-and-http-raw-host
117 | 
118 | And a full export of the keywords in CSV format can be generated with:
119 | 
120 | .. code-block::
121 | 
122 |  suricata --list-keywords=csv
123 |  name;description;app layer;features;documentation
124 |  sid;set rule ID;Unset;none;https://suricata.readthedocs.io/en/latest/rules/meta.html#sid-signature-id;
125 |  priority;rules with a higher priority will be examined first;Unset;none;https://suricata.readthedocs.io/en/latest/rules/meta.html#priority;
126 |  rev;set version of the rule;Unset;none;https://suricata.readthedocs.io/en/latest/rules/meta.html#rev-revision;
127 |  classtype;information about the classification of rules and alerts;Unset;none;https://suricata.readthedocs.io/en/latest/rules/meta.html#classtype;
128 | 
129 | 


--------------------------------------------------------------------------------
/source/tls-keywords.csv:
--------------------------------------------------------------------------------
 1 | Name,Description
 2 | tls.sni,sticky buffer to match specifically and only on the TLS SNI
 3 | tls.certs,sticky buffer to match the TLS certificate
 4 | tls.cert_issuer,sticky buffer to match specifically and only on the TLS cert issuer
 5 | tls.cert_subject,sticky buffer to match specifically and only on the TLS cert subject
 6 | tls.cert_serial,sticky buffer to match the TLS cert serial
 7 | tls.cert_fingerprint,sticky buffer to match on the TLS cert fingerprint
 8 | tls.version,match on TLS/SSL version
 9 | tls_cert_notbefore,match TLS certificate notBefore field
10 | tls_cert_notafter,match TLS certificate notAfter field
11 | tls_cert_expired,match expired TLS certificates
12 | tls_cert_valid,match not expired TLS certificates
13 | ja3.hash,sticky buffer to match the JA3 hash
14 | ja3.string,sticky buffer to match the JA3 string
15 | ja3s.hash,sticky buffer to match the JA3S hash
16 | ja3s.string,sticky buffer to match the JA3S string
17 | 


--------------------------------------------------------------------------------
/source/tls-threat-hunting.rst:
--------------------------------------------------------------------------------
  1 | ================================
  2 | TLS Detection and Threat Hunting
  3 | ================================
  4 | 
  5 | 
  6 | Introduction
  7 | ============
  8 | 
  9 | The TLS protocol is everywhere. The Secure Socket Layer implementation, initially developed for the Mozilla browser, has evolved into one of the most prominent standards. It is widely used in HTTPS and other communications protocols to encrypt communication. Yes, encrypt, which for network security is equivalent of saying, "hide all the juicy details".
 10 | 
 11 | But there is still information that can be extracted or built from encrypted communication. This can be used for threat hunting as well as an IDS approach.
 12 | 
 13 | 
 14 | Protocol overview
 15 | =================
 16 | 
 17 | In all versions of TLS, the client is opening a connection to the server and then sending an initial message. It contains the client capabilities in terms of encryption. Using that, the server then replies with potential agreement on encryption technique to use, as well as its certificates. The client analyzes this message and checks that the server certificate is valid. If everything is fine, the client sends its certificates and a seed that is needed to start the encrypted exchange. The server then initiates the encryption and the session switches to encryption.
 18 | 
 19 | Before TLS 1.3, the X509 certificate was in clear text. But since TLS 1.3, it is encrypted. As a result, visibility had been really limited with TLS 1.3.
 20 | 
 21 | In most implementations, there is a TCP connection and then a TLS handshake, but in some cases the server offers a clear text and an encrypted service on the same port. In this case, a mechanism is needed on the clear text protocol to trigger the switch. In most implementations this is the ``STARTTLS`` message. Most common protocols using this are SMTP, IMAP, and FTP.
 22 | 
 23 | 
 24 | TLS analysis in Suricata
 25 | ========================
 26 | 
 27 | 
 28 | TLS handshake analysis
 29 | ----------------------
 30 | 
 31 | Suricata does not decrypt the traffic but rather realizes an analysis of the TLS handshake. By doing this, it manages to extract information on the TLS characteristics as well as on the X509 certificates. This data is written in the ``tls`` event type and are also added to the ``alert`` when they are available.
 32 | 
 33 | Suricata can also extract the certificate chain sent by the server and store it inside the event or as a separate file.
 34 | 
 35 | 
 36 | Extracted fields
 37 | ----------------
 38 | 
 39 | Suricata extracts information about the TLS handshake and outputs this information in ``tls`` events.
 40 | 
 41 | A typical event looks like the following:
 42 | 
 43 | .. code-block:: JSON
 44 | 
 45 |   {
 46 |     "timestamp": "2020-05-08T23:32:34.218590+0200",
 47 |     "flow_id": 1737090126716212,
 48 |     "pcap_cnt": 41441,
 49 |     "event_type": "tls",
 50 |     "src_ip": "10.0.0.128",
 51 |     "src_port": 52046,
 52 |     "dest_ip": "64.233.179.94",
 53 |     "dest_port": 443,
 54 |     "proto": "TCP",
 55 |     "tls": {
 56 |       "subject": "C=US, ST=California, L=Mountain View, O=Google LLC, CN=*.gstatic.com",
 57 |       "issuerdn": "C=US, O=Google Trust Services, CN=GTS CA 1O1",
 58 |       "serial": "74:E6:32:EA:F9:C6:35:C2:02:00:00:00:00:63:98:DD",
 59 |       "fingerprint": "f5:af:1c:45:74:1b:2e:f2:5a:85:d1:49:be:dc:97:0d:2e:0c:97:a2",
 60 |       "sni": "www.gstatic.com",
 61 |       "version": "TLS 1.2",
 62 |       "notbefore": "2020-04-15T20:24:10",
 63 |       "notafter": "2020-07-08T20:24:10"
 64 |     }
 65 |   }
 66 | 
 67 | 
 68 | Among the interesting fields, we have the ``tls.sni`` which stands for TLS Server Name Indication and is in fact the host name requested by the client. This is sent by the client in the first message to allow the server to choose which certificate to send in his answer. This way the server can honor multiple services on the same port.
 69 | 
 70 | In this case, we have the ``tls.subject`` equals to ``"C=US, ST=California, L=Mountain View, O=Google LLC, CN=*.gstatic.com"`` which means because of the ``CN`` field that the certificate can serve any site that matches ``*.gstatic.com``. So we have some supplementary information thanks to the TLS SNI.
 71 | 
 72 | .. note::
 73 | 
 74 |   Check the `eve TLS format <https://suricata.readthedocs.io/en/latest/output/eve/eve-json-format.html?highlight=http#event-type-tls>`_ page in Suricata manual for more information on the TLS events.
 75 | 
 76 | .. index:: TLS JA3
 77 | 
 78 | TLS JA3
 79 | -------
 80 | 
 81 | In a standard TLS handshake, little is known about the client side. This is because the client certificate is not usually familiar nor is it sent over the wire. If we compare this with HTTP, we don't have the user agent field that (even if it is a declarative field) is a valuable source of information,  allowing us to identify and classify protocol clients.
 82 | 
 83 | `JA3 <https://github.com/salesforce/ja3>`_ was created by John B. Althouse, Jeff Atkinson, and Josh Atkins (hence the name of the method) to address this issue. It is based on the fact that similar implementations will send similar negotiation parameters in the initial message. By carefully selecting some of these parameters, we can build an identifier that discriminates the implementations with a fine granularity. As with most clever techniques, this looks very simple on the surface, but it has proven to be an incredibly efficient way to fingerprint a TLS client.
 84 | 
 85 | Identifying malware traffic with JA3 has proven to be successful even if there is a non-zero false positive.
 86 | 
 87 | The following example is a Suricata TLS event with JA3 activated:
 88 | 
 89 | .. code-block:: JSON
 90 | 
 91 |   {
 92 |     "timestamp": "2020-05-08T23:35:24.922820+0200",
 93 |     "flow_id": 995065818031171,
 94 |     "pcap_cnt": 51204,
 95 |     "event_type": "tls",
 96 |     "src_ip": "10.0.0.128",
 97 |     "src_port": 52047,
 98 |     "dest_ip": "144.91.76.208",
 99 |     "dest_port": 443,
100 |     "proto": "TCP",
101 |     "tls": {
102 |       "subject": "C=GB, ST=London, L=London, O=Global Security, OU=IT Department, CN=example.com",
103 |       "issuerdn": "C=GB, ST=London, L=London, O=Global Security, OU=IT Department, CN=example.com",
104 |       "serial": "00:9C:FC:DA:1D:A4:70:87:5D",
105 |       "fingerprint": "b8:18:2d:cb:c9:f8:1a:66:75:13:18:31:24:e0:92:35:42:ab:96:89",
106 |       "version": "TLSv1",
107 |       "notbefore": "2020-05-03T11:07:28",
108 |       "notafter": "2021-05-03T11:07:28",
109 |       "ja3": {
110 |         "hash": "6734f37431670b3ab4292b8f60f29984",
111 |         "string": "769,47-53-5-10-49171-49172-49161-49162-50-56-19-4,65281-10-11,23-24,0"
112 |       },
113 |       "ja3s": {
114 |         "hash": "623de93db17d313345d7ea481e7443cf",
115 |         "string": "769,49172,65281-11"
116 |       }
117 |     }
118 |   }
119 | 
120 | The ja3 part is the following:
121 | 
122 | .. code-block:: JSON
123 | 
124 |   {
125 |     "ja3" {
126 |       "hash": "6734f37431670b3ab4292b8f60f29984",
127 |       "string": "769,47-53-5-10-49171-49172-49161-49162-50-56-19-4,65281-10-11,23-24,0"
128 |     }
129 |   }
130 | 
131 | It is composed of 2 fields: a string that is built by concatenating a predefined list of negotiation parameters and a hash value that is simply the md5 hash of the string.
132 | 
133 | This hash has been linked to `Trickbot <https://twitter.com/4a4133/status/1043246635239854081?lang=en>`_ by John B. Althouse. So just using this information is enough to identify a potential malware. Even if the server infrastructure is composed of multiple services and evolves, the JA3 of the client will stay the same as the data is based on the client's first message that can not be influenced by the server.
134 | 
135 | .. index:: TLS JA3S
136 | 
137 | TLS JA3s
138 | --------
139 | 
140 | JA3s is almost enough to define what JA3s is. It is a technique similar to JA3 that is used to fingerprint the TLS implementation of server. By analyzing the first message from the server, a predefined list of parameters is concatenated and a md5 hash is built. This leads to the following result in our previous entry:
141 | 
142 | .. code-block:: JSON
143 | 
144 |   {
145 |     "ja3s": {
146 |       "hash": "623de93db17d313345d7ea481e7443cf",
147 |       "string": "769,49172,65281-11"
148 |     }
149 |   }
150 | 
151 | But there is a big difference between JA3 and JA3s. Because the first message from the server is an answer to the client to continue the negotiation, the server message is dependant of the client. As a result, the JA3s is in fact an identifier of a client and server connection more than a server identification. To be fully explicit, two different clients connecting to a server will result in two different JA3s value.
152 | 
153 | 
154 | TLS and Detection
155 | =================
156 | 
157 | TLS keywords
158 | ------------
159 | 
160 | As usual, it is recommended to use all sticky buffers variants as they offer greater flexibility and better performance.
161 | 
162 | There are two classes of keywords: the one matching the TLS certificate information and the one matching on ja3 and ja3s data.
163 | 
164 | .. csv-table::
165 |   :file: tls-keywords.csv
166 |   :header-rows: 1
167 | 
168 | Extensive documentation and syntax explanation is available in Suricata documentation in the `TLS keywords page <https://suricata.readthedocs.io/en/latest/rules/tls-keywords.html>`_.
169 | 
170 | Cookbook
171 | --------
172 | 
173 | 
174 | Detecting expired certificates
175 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 | 
177 | 
178 | Let's get an alert when one of the servers we monitor has an expired certificate:
179 | 
180 | .. code-block::
181 | 
182 |   alert tls $SERVERS any -> any any (msg:"Expired certs on server"; \\
183 |        tls_cert_expired; \\
184 |        sid:1; rev:1;)
185 | 
186 | Here, we simply use the `tls_cert_expired` keyword and the `$SERVERS` variable that needs to be placed on the left because the certificate data we want to check is coming from the servers.
187 | 
188 | 
189 | Checking that internal PKI is used
190 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
191 | 
192 | The company we work for is running an expensive Public Key Infrastructure (PKI) and we want to be sure it is used for all the services running on our servers. If the TLS issuer of our PKI is `C=US, O=My Company`, we can simply use the following signature that leverages the `tls.cert_issuer`
193 | sticky buffer keyword.
194 | 
195 | .. code-block::
196 | 
197 |   alert tls $SERVERS any -> any any (msg:"Non Company PKI on server"; \\
198 |        tls.cert_issuer; content:!"C=US, O=My Company"; \\
199 |        sid:2; rev:1;)
200 | 
201 | We use an `!` on the content keyword to negate the match.
202 | 
203 | If we need to deal with historal data, we can just do a trigger alert for certificates where the beginning of validity is after the date when
204 | the PKI is supposed to be implemented everywhere:
205 | 
206 | .. code-block::
207 | 
208 |   alert tls $SERVERS any -> any any (msg:"Non Company PKI on server"; \\
209 |        tls.cert_issuer; content:!"C=US, O=My Company"; \\
210 |        tls_cert_notbefore:>2021-04-01; \\
211 |        sid:2; rev:1;)
212 | 
213 | 
214 | Checking Tactiques, Techniques and Procedure on certificate building
215 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
216 | 
217 | Correctly creating TLS certificates is not necessarily a trivial task for either a threat hunter or attacker. For example, some Ursnif campaigns have been using certificates where the subject DN was of the form `C=XX, ST=1, L=1, O=1, OU=1, CN=*`. This `XX` and `1` are not something expected in regular certificates and it is a mark of the Tactics, Techniques, and Procedures (TTP) of the attacker.
218 | 
219 | This is something we can detect with a signature:
220 | 
221 | .. code-block::
222 | 
223 |   alert tls $EXTERNAL_NET any -> $HOME_NET any (msg:"Ursnif like certificate"; \\
224 |        tls.cert_subject; content:"C=XX"; content:"=1,"; \\
225 |        sid:3; rev:1;)
226 | 
227 | Here, we alert when a certificate on an external server is using a certificate that follows the pattern we have found in the
228 | Ursnif campaign.
229 | 
230 | 
231 | Verifying a list of known bad JA3
232 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
233 | 
234 | 
235 | .. code-block::
236 | 
237 |   alert tls $HOME_NET any -> any any (msg:"New internal certificate authority"; \\
238 |         tls.ja3; dataset:set,bad-ja3, type string, load bad-ja3.lst; \\
239 |         sid:4; rev:1;)
240 | 
241 | 
242 | Here, we alert as soon as a TLS JA3 from the set of known bad JA3 is seen.
243 | 
244 | 
245 | Build the list of internally used certificate authorities
246 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
247 | 
248 | In a production environment it is useful to know what TLS certificates authorities are using internally. This can be done with Suricata by using the dataset keyword:
249 | 
250 | .. code-block::
251 | 
252 |   alert tls $HOME_NET any -> any any (msg:"New internal certificate authority"; \\
253 |         tls.issuerdn; dataset:set,internal-issuers, type string, state internal-issuers.lst, memcap 10Mb, hashsize 100; \\
254 |         sid:5; rev:1;)
255 | 
256 | Here we alert as soon as a TLS issuer is seen coming from the internal network that has never been seen before.
257 | 
258 | 
259 | Hunting on TLS events
260 | =====================
261 | 
262 | 
263 | Self signed certificates
264 | ------------------------
265 | 
266 | Self signed certificates can be detected via signatures. See `this blog post <https://www.stamus-networks.com/blog/2015/07/24/finding-self-signed-tls-certificates-suricata-and-luajit-scripting>`_ by Stamus Networks explaining the process using a lua based signature.
267 | 
268 | This can also be done using the TLS events. If `tls.issuerdn` is equal to `tls.subject`, then we have a self signed certificate.
269 | 
270 | If you have only the EVE JSON file and access to the command line, you can use `jq` to find them:
271 | 
272 | .. code-block::
273 | 
274 |   cat eve.json | jq 'select(.event_type=="tls" and .tls.issuerdn==.tls.subject)'
275 | 
276 | In Splunk, one can simply do the following:
277 | 
278 | .. code-block::
279 | 
280 |  event_type="tls" tls.subjectdn=tls.issuerdn
281 | 
282 | If your data is in Elasticsearch you can do a search in Kibana with DSL filter:
283 | 
284 | .. code-block::
285 | 
286 |   {
287 |     "query": {
288 |       "bool": {
289 |         "must": {
290 |           "script": {
291 |             "script": {
292 |               "inline": "if (doc.containsKey('tls.subject.keyword') && (!doc['tls.subject.keyword'].empty)) { return (doc['tls.subject.keyword'] == doc['tls.issuerdn.keyword']) } else { return false }" 
293 |             }
294 |           }
295 |         }
296 |       }
297 |     }
298 |   }
299 | 
300 | In some cases, you may have to replace `keyword` by `raw` in your search. You can access Query DSL filter by clicking `+ Add filter` then `Edit as Query DSL`.
301 | 
302 | 
303 | Unsecure protocol
304 | -----------------
305 | 
306 | Some TLS and SSL versions are considered to be unsecure due to design flaws and known successful attacks. Therefore, it is interesting to find any connection using this weak policy so any eye dropping can be prevented. Known unsecure versions are all SSL versions and TLS up to 1.1.
307 | 
308 | It is possible to search this Elasticsearch by using the following filter:
309 | 
310 | .. code-block::
311 | 
312 |   tls.version:SSL% OR tls.version:TLSv1 OR tls.version:"TLS 1.1"
313 | 
314 | In Splunk, this can be written as:
315 | 
316 | .. code-block::
317 | 
318 |   event_type=tls AND tls.version IN ("SSLv2", "SSLv3", "TLSv1", "TLS 1.1")
319 | 
320 | 
321 | Expired certificates
322 | --------------------
323 | 
324 | The simplest way to achieve that is to use the `tls_cert_expired` keyword as seen in this signature: ::
325 | 
326 |   alert tls any any -> any any (msg:"expired certs"; tls_cert_expired; sid:1; rev:1;)
327 | 
328 | But it is also possible to do this in Splunk:
329 | 
330 | .. code-block::
331 | 
332 |  event_type=tls |
333 |  eval tls_after_date = strptime('tls.notafter',"%Y-%m-%dT%H:%M:%S") |
334 |  eval event_time = strptime(timestamp,"%Y-%m-%dT%H:%M:%S.%6N%z") |
335 |  eval validity = tls_after_date - event_time |
336 |  search validity < 0 |
337 |  top tls.subject, tls.issuerdn, tls.notafter, timestamp, validity
338 | 
339 | The complex part consists of parsing the two time stamps we are interested in with `strptime` and then computing the validity. The result
340 | of the query is shown on :numref:`splunk-expired-tls`.
341 | 
342 | .. _splunk-expired-tls:
343 | 
344 | .. figure:: img/splunk-expired-tls.png
345 | 
346 |    Splunk search on expired certificates
347 | 
348 | 
349 | TLS Cipher Suite analysis
350 | -------------------------
351 | 
352 | The negotiated TLS Cipher Suites used in a network are interesting to 
353 | monitor. They contain the set of algorithms used on TLS to protect the communication.
354 | The level of security and confidentiality provided by the various algorithms varies greatly.
355 | For instance, TLS_NULL_WITH_NULL_NULL is a valid TLS cipher suite and, yes, it means that nothing
356 | is done and the data is in clear text. While this is an extreme case, some other
357 | TLS cipher suites should be avoided like the one using the RC4 algorithm.
358 | 
359 | If this information is not directly available in Suricata TLS events, it is available
360 | as one of the TLS JA3S parameters. The second parameter of the JA3S string is
361 | the Cipher ID. This is an integer, as TLS is not sending a string over the wire. Nevertheless, this
362 | is interesting information anyway. 
363 | 
364 | We can use Splunk's extraction capabilities to get the value of the Cipher ID in a distinct field.
365 | All we need to do is to split the JA3S string and get the second element. This can be done as follows:
366 | 
367 | .. code-block::
368 | 
369 |   event_type=tls |
370 |     spath tls.ja3s.string output=ja3s_string |
371 |     eval ja3s_elt=split(ja3s_string,",") |
372 |     eval cipher_id=mvindex(ja3s_elt, 1)
373 | 
374 | 
375 | Getting from the ID to the string version of the TLS Cipher suite can then be done via a lookup table.
376 | It can be extracted from the IANA website. This mapping is available in the 
377 | `Stamus Splunk App <https://splunkbase.splunk.com/app/5262>`_ which also contains other interesting
378 | information.
379 | 
380 | The French National Cybersecurity Agency (`ANSSI <https://www.ssi.gouv.fr/>`_) has published `Security Recommendations for TLS <https://www.ssi.gouv.fr/guide/recommandations-de-securite-relatives-a-tls/>`_ where
381 | a list of recommended TLS cipher suites is defined. Their classification also contains `degraded` TLS cipher suites that are ok to use if there are no
382 | alternatives. All other TLS cipher suites should be considered as insecure. The mapping included in the Stamus Splunk App contains this information in the lookup table,
383 | so it is possible to search and do statistics on the security of the TLS cipher suite seen on the network. For example, to list all insecure 
384 | TLS connections seen on the network, one can do the following in Splunk:
385 | 
386 | .. code-block::
387 | 
388 |   event_type=tls |
389 |     spath tls.ja3s.string output=ja3s_string |
390 |     eval ja3s_elt=split(ja3s_string,",") |
391 |     eval cipher_id=mvindex(ja3s_elt, 1) |
392 |     lookup tls_cipher_mapping.csv id as cipher_id |
393 |     search cipher_security=insecure
394 | 
395 | Here we add to the previous a call to the lookup followed by a search on the field `cipher_security` that is added by the lookup.
396 | 
397 | .. _splunk-tls-cipher:
398 | 
399 | .. figure:: img/splunk-tls-cipher.png
400 | 
401 |    TLS Cipher Suites analysis in Stamus Splunk App
402 | 
403 | Using this technique, it is possible to build searches that classify the TLS cipher suites and
404 | display the insecure ones. This is available in one of the Stamus Splunk App dashboards as shown on :numref:`splunk-tls-cipher`.
405 | 
406 | 


--------------------------------------------------------------------------------
/source/tools.rst:
--------------------------------------------------------------------------------
 1 | Suricata ecosystem
 2 | ==================
 3 | 
 4 | Some tools will be used throughout the document. They are part of the central tooling around Suricata.
 5 | 
 6 | JQ
 7 | --
 8 | 
 9 | `JQ <https://stedolan.github.io/jq/>`_ is a command line tool that allows users to format, search, and modify JSON objects.
10 | 
11 | Elastic stack
12 | -------------
13 | 
14 | The `Elastic stack <https://www.elastic.co/>`_ is a software suite that implements a distributed NoSQL database
15 | (Elasticsearch) with a visualization interface (Kibana) and a log ingestion tool (Logstash). There are other components in the stack that will not be covered here.
16 | 
17 | Some useful Kibana dashboards have been published by Stamus Networks on `Github <https://github.com/StamusNetworks/KTS7>`_.
18 | 
19 | Splunk
20 | ------
21 | 
22 | The `Splunk <https://splunk.com>`_ platform is a search, analysis, and visualization engine that features
23 | a really powerful query language.
24 | 
25 | If you are a Splunk user you may want to get a look at the `Stamus Networks app for Splunk <https://splunkbase.splunk.com/app/5262/>`_
26 | that provides ready to use dashboards and reports for Suricata and Stamus Networks users.
27 | 
28 | 
29 | .. index:: Suricata Language Server
30 | 
31 | .. _suricata-ls:
32 | 
33 | Suricata Language Server
34 | ------------------------
35 | 
36 | The Suricata Language Server is an implementation of the Language Server Protocol for Suricata signatures. It adds syntax checks and hints as well as auto-completion to your preferred editor once it is configured. Information displayed in the editor is highly valuable when writing Suricata signatures as it
37 | ensures the rules syntax is correct while providing hints about writing performant rules.
38 | 
39 | Editors that are known to support the Suricata Language Server are Neovim, Visual Studio Code, Sublime Text 3, and Kate, but any editor supporting the Language Server Protocol should also support the Suricata Language Server.
40 | 
41 | .. image:: img/vscode-sample.png
42 | 
43 | The Suricata Language Server currently supports auto-completion and advanced syntax checking. Both features use the capabilities of the Suricata deployment available on the system. This means that the list of keywords (with documentation information) and the syntax checking both come from Suricata itself. While this comes at the cost of Suricata needing to be installed on the system, it also guarantees a strict check of signatures with respect to the version of Suricata you are running. Pushing signatures to production will not return a bad surprise as the syntax has already been checked by the same engine. 
44 | 
45 | Syntax checking is completed when files are saved. A configuration test is started using Suricata, in turn providing errors to the diagnostic. Warnings and hints are also provided by using Suricata's detection engine analysis. This analysis can return warnings and hints about potential issues seen within the signatures.
46 | 
47 | You can get the `Suricata Language Server <https://github.com/StamusNetworks/suricata-language-server>`_ from GitHub.
48 | 
49 | 


--------------------------------------------------------------------------------
/source/write-performant-rules.rst:
--------------------------------------------------------------------------------
  1 | .. _performant-rules:
  2 | 
  3 | =================================
  4 | Writing performant Suricata rules
  5 | =================================
  6 | 
  7 | Suricata detection engine optimizations
  8 | =======================================
  9 | 
 10 | 
 11 | The detection engine optimization challenge
 12 | -------------------------------------------
 13 | 
 14 | In demanding enterprise environments, Suricata must operate at very high network speeds -- often between 40Gbps and 100Gbps -- with the full ETPro ruleset loaded. That ruleset is approximately 60,000 signatures, and in order to keep up with line rate, Suricata must inspect all those packets at a rate of 3,333,333 packets per second (when operating at 40Gbps).
 15 | 
 16 | So, at 40Gbps there is a budget of .000000000005 seconds per rule. And in this .005 ns per rule, Suricata must do protocol analysis, content matching, and execute regular expressions.
 17 | 
 18 | In a typical 3GHz CPU, we have a CPU cycle of 3 ns. As a result, using a brute force approach in the detection engine is 3 orders of magnitude too little, even if a test takes only a single cycle.
 19 | 
 20 | Thus, some serious optimizations are needed. Scaling via multithreading to use all cores on the system is a key point here, and Suricata does this very well. But even on a one hundred core system, it will only lead to a 100 factor improvement, and this still leaves us an order of magnitude below the bare minimum needed for the task.
 21 | 
 22 | Running load balancing on the CPU is incredibly important, but we still cannot address the 60,000 rules. In this case, we would need to reduce the number of rules processed. Unfortunately, running fewer rules will reduce the threat coverage, so we need a better solution.
 23 | 
 24 | 
 25 | Grouping signatures
 26 | -------------------
 27 | 
 28 | This initial approach is quite simple: why should we evaluate a rule on a UDP flow if we are currently inspecting a TCP packet? By doing a protocol split, we can, in a perfect case, divide the number of signatures to evaluate by two.
 29 | 
 30 | While we are at it, we can group signatures by protocol port, group network parameters into a tree, and place groups of signatures in the leafs.
 31 | 
 32 | This is an interesting first step, but I'm sure some readers are already concerned about the fact that everything in their network is HTTP or TLS. Thus, they have only 2 used groups.
 33 | 
 34 | Something else is needed.
 35 | 
 36 | .. index:: Multi Pattern Matching
 37 | 
 38 | 
 39 | Multi pattern matching
 40 | ----------------------
 41 | 
 42 | Since we can not differentiate on the IP parameters, we need to go higher in the protocol stack to complete the task; however, an alert can match on an HTTP user agent or on file data transferred over SMB. Given the complexity of the fields we are matching on, we cannot do an implementation of the tree. 
 43 | 
 44 | So let's take one step back. In this case, we are pattern matching on one buffer (HTTP user agent, file data, etc...) and would have a wonderful
 45 | increase in performance if we could have an automatic tree built up for the patterns we are looking for
 46 | on this buffer.
 47 | 
 48 | This type of algorithm is named multi pattern matching (MPM) and the most famous implementation
 49 | is called `Aho–Corasick algorithm <https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm>`_.
 50 | 
 51 | This method allows for a really effective split of signatures.
 52 | 
 53 | First, Suricata separates the signatures by IP parameters. Then, it looks at the fast pattern buffer (which has been selected for use with the multi pattern algorithm). There can be only one buffer in order to guarantee a perfect partition of the ruleset. Once the MPM algorithm has returned, there
 54 | will be only a small subset of signatures to evaluate. Ideally, if the pattern is well chosen, Suricata could have just a single signature to evaluate.
 55 | 
 56 | Let's use this signature as example ::
 57 | 
 58 |   alert http any any -> any any (msg:"Bad Agent"; http.user_agent; content: "Winhttp"; fast_pattern; startswith; pcre:"/^Winhttp [0-9]+\/[0-9]+/"; sid:1;)
 59 | 
 60 | The evaluation of this signature by Suricata will be as follows:
 61 | 
 62 | It will be attached to the set of signatures that have the HTTP user agent as the fast pattern buffer. As a result, the `Winhttp` content match will be evaluated during the MPM phase with all the other matches. One pass algorithm to rule them all. If there is ever a match, the signature will be fully evaluated, content will be checked (which starts with modification), and the regular expression `pcre:"/^Winhttp [0-9]+\/[0-9]+/"` will be verified. So, if `Winhttp` is an efficient differentiator among the HTTP user agent's value, Suricata might have just one signature to fully evaluate instead of the original 60000.
 63 | 
 64 | This approach allows Suricata to analyze the full ruleset in a way that is not dependent on the number of signatures. This is dependent on whether or not the signature are correctly written. For example, we cannot have half of them using `Mozilla` as fast pattern buffer on the HTTP user agent because it will result in evaluating a huge number of signatures for each HTTP request since the ` Mozilla` string is present in the HTTP user agent of most common browsers.
 65 | 
 66 | 
 67 | Testing performance and correctness of written rules
 68 | ====================================================
 69 | 
 70 | Suricata provides a set of tools to help users write correct rules.
 71 | 
 72 | .. index:: Engine analysis
 73 | 
 74 | 
 75 | Engine analysis
 76 | ---------------
 77 | 
 78 | Simply run the following command: ::
 79 | 
 80 |   suricata -S mynew.rules -l /tmp/analysis --engine-analysis
 81 | 
 82 | If inputted correctly, you will receive information about the syntax of the rules ::
 83 | 
 84 |   ls -l /tmp/analysis/
 85 |   total 16
 86 |   -rw-r--r-- 1 eric eric    0 Feb 17 18:58 eve.json
 87 |   -rw-r--r-- 1 eric eric    0 Feb 17 18:58 fast.log
 88 |   -rw-r--r-- 1 eric eric  733 Feb 17 18:58 rules_analysis.txt
 89 |   -rw-r--r-- 1 eric eric  643 Feb 17 18:58 rules_fast_pattern.txt
 90 |   -rw-r--r-- 1 eric eric  665 Feb 17 18:58 rules.json
 91 |   -rw-r--r-- 1 eric eric    0 Feb 17 18:58 stats.log
 92 |   -rw-r--r-- 1 eric eric 2314 Feb 17 18:58 suricata.log
 93 | 
 94 | Information is provided in the files ``rules_analysis.txt`` and ``rules_fast_pattern.txt``. In the first one, we can see a previous signature and a variant: ::
 95 | 
 96 |   -------------------------------------------------------------------
 97 |   Date: 17/2/2021 -- 19:30:28
 98 |   -------------------------------------------------------------------
 99 |   == Sid: 1 ==
100 |   alert http any any -> any any (msg:"Bad Agent"; http.user_agent; content: "Winhttp"; fast_pattern; startswith; pcre:"/^Winhttp [0-9]+\/[0-9]+/"; sid:1;)
101 |       Rule matches on http user agent buffer.
102 |       App layer protocol is http.
103 |       Rule contains 0 content options, 1 http content options, 0 pcre options, and 1 pcre options with http modifiers.
104 |       Fast Pattern "Winhttp" on "http user agent (http_user_agent)" buffer.
105 |       Warning: TCP rule without a flow or flags option.
106 |                -Consider adding flow or flags to improve performance of this rule.
107 |   
108 |   == Sid: 2 ==
109 |   alert http any any -> any any (msg:"Bad Agent, bad perf"; http.user_agent; pcre:"/^Winhttp [0-9]+\/[0-9]+/"; sid:2;)
110 |       Rule matches on http user agent buffer.
111 |       App layer protocol is http.
112 |       Rule contains 0 content options, 0 http content options, 0 pcre options, and 1 pcre options with http modifiers.
113 |       Warning: TCP rule without a flow or flags option.
114 |                -Consider adding flow or flags to improve performance of this rule.
115 | 
116 | What we see here is that the first signature has a fast pattern and missed some options on TCP flow. For the second signature, where
117 | there is just a regular expression, we can see that there is no fast pattern and that the TCP flow options are also missing. 
118 | 
119 | For the fast pattern analysis there is ::
120 | 
121 |   -------------------------------------------------------------------
122 |   Date: 17/2/2021 -- 19:30:28
123 |   -------------------------------------------------------------------
124 |   == Sid: 1 ==
125 |   alert http any any -> any any (msg:"Bad Agent"; http.user_agent; content: "Winhttp"; fast_pattern; startswith; pcre:"/^Winhttp [0-9]+\/[0-9]+/"; sid:1;)
126 |       Fast Pattern analysis:
127 |           Fast pattern matcher: http user agent (http_user_agent)
128 |           Flags: Depth
129 |           Fast pattern set: yes
130 |           Fast pattern only set: no
131 |           Fast pattern chop set: no
132 |           Original content: Winhttp
133 |           Final content: Winhttp
134 |   
135 |   == Sid: 2 ==
136 |   alert http any any -> any any (msg:"Bad Agent, bad perf"; http.user_agent; pcre:"/^Winhttp [0-9]+\/[0-9]+/"; sid:2;)
137 |       Fast Pattern analysis:
138 |           No content present
139 | 
140 | This confirms the fact that the second rule will trigger an evaluation of the regular expression for all the http requests (where there is an http user agent).
141 | 
142 | Information about the structure of the signature is also available in ``rules.json``. It is less human friendly, but follows the evolution of Suricata's detection engine more closely. For example, this output is used by the :ref:`Suricata Language Server <suricata-ls>` to build advanced analysis of the signatures file.
143 | 
144 | .. _profiling-info:
145 | 
146 | .. index:: Rules profiling
147 | 
148 | 
149 | Rules profiling
150 | ---------------
151 | 
152 | The information provided by Suricata in the engine analysis is really valuable, but it is often better to see the impact on a real run. To do so, there is a profiling system inside Suricata that needs to be activated during the build and can be setup in the configuration.
153 | 
154 | To build it you need to add ``--enable-profiling`` to the ``./configure`` command line. Suricata performance will be impacted and this should not be used in production, but you will have a ``rule_perf.log`` file in your log directory with performance information.
155 | 
156 | .. code-block:: JSON
157 | 
158 |   {
159 |     "timestamp": "2021-02-17T19:41:56.012543+0100",
160 |     "sort": "max ticks",
161 |     "rules": [
162 |       {
163 |         "signature_id": 2,
164 |         "gid": 1,
165 |         "rev": 0,
166 |         "checks": 1628,
167 |         "matches": 4,
168 |         "ticks_total": 2173774,
169 |         "ticks_max": 49498,
170 |         "ticks_avg": 1335,
171 |         "ticks_avg_match": 23204,
172 |         "ticks_avg_nomatch": 1281,
173 |         "percent": 93
174 |       },
175 |       {
176 |         "signature_id": 1,
177 |         "gid": 1,
178 |         "rev": 0,
179 |         "checks": 4,
180 |         "matches": 4,
181 |         "ticks_total": 149520,
182 |         "ticks_max": 41118,
183 |         "ticks_avg": 37380,
184 |         "ticks_avg_match": 37380,
185 |         "ticks_avg_nomatch": 0,
186 |         "percent": 6
187 |       }
188 |     ]
189 |   }
190 | 
191 | Here, we see that signature 2 did take 93% of CPU cycles compared to the second one at 6%. This was expected as we evaluated the regular expression for all HTTP requests. An interesting observation is that ``ticks_avg_nomatch`` is 0 for the signature with fast pattern. The reason is that when there is no ``Winhttp`` string in the HTTP user agent the MPM algorithm simply skips the evaluation of the rules and hence its cost is null. With the incorrect signature we can see that the cost is 1281 ticks for every match attempt, and we have 4 ``checks`` for signature 1 and 1628 for signature 2. Hence, the performance ratio is calculated.
192 | 
193 | A perfect signature should have zero in ``ticks_avg_nomatch`` and should have a really low ``ticks_avg_match``. The first point being the most important as it means the multi pattern matching on the signature is not triggering when the signature is not matching. This will be the case when the pattern used in MPM is discriminative enough that no other signatures are using it.
194 | 
195 | 
196 | Guideline for performant rules
197 | ==============================
198 | 
199 | 
200 | Trigger multi pattern matching
201 | ------------------------------
202 | 
203 | This is the main recommendation:
204 | 
205 | When writing a rule you need to find a way to trigger MPM in an efficient way. This means the signature must have a content match on a pattern that is on a differentiator. It should be almost unique in the ruleset so it reduces the signature group to the lowest number possible.
206 | 
207 | In our previous example, we used ``http.user_agent; content: "Winhttp";`` because the string ``Winhttp`` is not common among HTTP user agents. This guaranteed us an efficient prefiltering by the MPM engine. As we have seen previously in the profiling output, all the checks done on the signature have been successful. The rest of the filters were just confirmation filters to avoid potential false positives.
208 | 
209 | 
210 | Prefilter everything
211 | -------------------------
212 | 
213 | This is just a reformulation of the previous exigency. Even if the real match is a nasty regular expression, you still need to find the longest string possible with an efficient differentiator capability.
214 | 
215 | .. _dataset-ioc:
216 | 
217 | 
218 | Matching on IOCs
219 | ----------------
220 | 
221 | In a lot of cases, indicators of compromises comes as a list of domains, IPs, and user agents to match against the produce data. An already seen approach consists of generating a rule for each indicator of compromise (IOC).
222 | 
223 | This will match, but the performance impact will be huge.
224 | 
225 | If you have to match on an IP list, it is better to use the IP reputation system via the `iprep <https://suricata.readthedocs.io/en/latest/rules/ip-reputation-rules.html>`_ keyword that allows a fast match and one single rule for any number of IP addresses.
226 | 
227 | The same can be done for file hash via the keywords `filemd5 <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html?highlight=filemd5#filemd5>`_, `filesha1 <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html?highlight=filemd5#filesha1>`_, and `filesha256 <https://suricata.readthedocs.io/en/latest/rules/file-keywords.html?highlight=filemd5#filesha256>`_ that match on the list of file hashes. 
228 | 
229 | For example, with a list of sha256 file hashes named ``known-bad-sha256.lst``, one can use the following signatures: ::
230 | 
231 |   alert smb any any -> any any (msg:"known bad file on SMB"; filesha256:"known-bad-sha256.lst"; sid:1; rev:1;)
232 |   alert nfs any any -> any any (msg:"known bad file on NFS"; filesha256:"known-bad-sha256.lst"; sid:2; rev:1;)
233 |   alert http any any -> any any (msg:"known bad file on HTTP"; filesha256:"known-bad-sha256.lst"; sid:3; rev:1;)
234 |   alert ftp-data any any -> any any (msg:"known bad file on FTP"; filesha256:"known-bad-sha256.lst"; sid:4; rev:1;)
235 |   alert smtp any any -> any any (msg:"known bad file on SMTP"; filesha256:"known-bad-sha256.lst"; sid:5; rev:1;)
236 | 
237 | Introduced in Suricata 5.0, `dataset <https://suricata.readthedocs.io/en/latest/rules/datasets.html>`_ is filling the gap for over existing IOCs. It can be used with any sticky buffers. For example, if you have a list of HTTP user agents in ``bad-http-agent.lst``, you can use a signature similar to the following ::
238 | 
239 |   alert http any any -> any any (msg:"bad user agent"; \
240 |       http.user_agent; dataset:isset,bad-http-agent,type string,load:http-user-agent.lst,memcap:1G,hashsize:1000000; \
241 |       sid 6; rev:1;)
242 | 
243 | Please note: in the case of a dataset with string type, the set needs to first be encoded to base64 (without the trailing
244 | character).
245 | 
246 | 
247 | Real life example
248 | =================
249 | 
250 | When `Sunburst <https://www.fireeye.com/blog/threat-research/2020/12/evasive-attacker-leverages-solarwinds-supply-chain-compromises-with-sunburst-backdoor.html>`_ was made public, a set of signatures was soon created to detect some of the offensive tools used by Fireeye. Among them we had this Snort-like signature: ::
251 | 
252 |   alert tcp any $HTTP_PORTS -> any any (msg:"Backdoor.HTTP.BEACON.[CSBundle MSOffice Server]"; content:"HTTP/1."; depth:7; \
253 |         content:"{\"meta\":{},\"status\":\"OK\",\"saved\":\"1\",\"starttime\":17656184060,\"id\":\"\",\"vims\":{\"dtc\":\""; \
254 |         sid:25887; rev:1;)
255 | 
256 | This signature has some serious problems when run inside Suricata. The engine analysis gives the following result: ::
257 | 
258 |     Rule matches on packets.
259 |     Rule matches on reassembled stream.
260 |     Rule contains 2 content options, 0 http content options, 0 pcre options, and 0 pcre options with http modifiers.
261 |     Fast Pattern "{\x22meta\x22:{},\x22status\x22:\x22OK\x22,\x22saved\x22:\x221\x22,\x22starttime\x22:17656184060,\x22id\x22:\x22\x22,\x22vims\x22:{\x22dtc\x22:\x22" on "payload and reassembled stream" buffer.
262 |     Warning: TCP rule without a flow or flags option.
263 |              -Consider adding flow or flags to improve performance of this rule.
264 |     Warning: Rule has depth/offset with raw content keywords.  Please note the offset/depth will be checked against both packet payloads and stream.  If you meant to have the offset/depth checked against just the payload, you can update the signature as "alert tcp-pkt..."
265 |     Warning: Rule is inspecting both the request and the response.
266 | 
267 | The first warning is about the lack of options because the signature is not checking the direction (to the client in our case) or ensuring that the flow is established. The second warning is more interesting because it warns us that Suricata will inspect the content twice: one time for every TCP packet and one time for each TCP stream. And finally, the third warning mentions that the signature could inspect request and response (in the event that the  HTTP_PORTS variable is broad).
268 | 
269 | But the presence itself of HTTP_PORTS is a problem. If the attacker ever changes the port of the web server to something not covered by the variable we will miss the detection. A typical Suricata signature will fix that by making use of the port independent protocol detection. 
270 | 
271 | This can simply be done by doing: ::
272 | 
273 |   alert http any any -> any any
274 | 
275 | As we are looking at the stream to the client, we can add `flow:established,to_client` to the rule
276 | 
277 | If we run the modified rules through the detection engine, we see: ::
278 | 
279 |     Warning: Rule app layer protocol is http, but content options do not have http_* modifiers.
280 |              -Consider adding http content modifiers.
281 | 
282 | Yes, we are still doing TCP stream matching on a signature on the HTTP protocols instead of matching inside the fields of the HTTP protocol.
283 | 
284 | Let’s look at the first content match: ::
285 | 
286 |   content:"HTTP/1."; depth:7;
287 | 
288 | We are matching on the beginning of the server answer because HTTP_PORTS was on the left in the initial signature. So what we have now is a confirmation that the answer starts by the `HTTP/1.` string. A potential solution is to use the keyword `http.response_line`: ::
289 | 
290 |   http.response_line; content:"HTTP/1."; depth:7;
291 | 
292 | The second match is the following: ::
293 | 
294 |   content:"{\"meta\":{},\"status\":\"OK\",\"saved\":\"1\",\"starttime\":17656184060,\"id\":\"\",\"vims\":{\"dtc\":\"";
295 | 
296 | We don’t have access to the packet, but it looks like a good guess to assume that the data was in the response body from the server. 
297 | 
298 | So now we can do: ::
299 | 
300 |   http.response_body; content:"{\"meta\":{},\"status\":\"OK\",\"saved\":\"1\",\"starttime\":17656184060,\"id\":\"\",\"vims\":{\"dtc\":\"";
301 | 
302 | We end up with the following rules that have no warning: ::
303 | 
304 |   alert http any any -> any any (msg:"Backdoor.HTTP.BEACON.[CSBundle MSOffice Server]"; \
305 |         http.response_line; content:"HTTP/1."; depth:7; \
306 |         http.response_body; content:"{\"meta\":{},\"status\":\"OK\",\"saved\":\"1\",\"starttime\":17656184060,\"id\":\"\",\"vims\":{\"dtc\":\""; \
307 |         flow:established,to_client; sid:25887; rev:1; ) 
308 | 
309 | The initial signature was published by Proofpoint in the emerging threats ruleset, but it was fully rewritten the next day by the Proofpoint team to instead read: ::
310 | 
311 |   alert http $EXTERNAL_NET any -> $HOME_NET any (msg:"ET CURRENT_EVENTS [Fireeye] Backdoor.HTTP.BEACON.[CSBundle MSOffice Server]"; \
312 |         flow:from_server,established; \
313 |         http.response_line; content:"HTTP/1."; depth:7; \
314 |         file.data; content:"|7b 22|meta|22 3a 7b 7d 2c 22|status|22 3a 22|OK|22 2c 22|saved|22 3a 22|1|22 2c 22|starttime|22 3a|17656184060|2c 22|id|22 3a 22 22 2c 22|vims|22 3a 7b 22|dtc|22 3a 22|"; fast_pattern; \
315 |         reference:url,github.com/fireeye/red_team_tool_countermeasures; \
316 |         classtype:trojan-activity; sid:2031279; rev:3; \
317 |         metadata:affected_product Windows_XP_Vista_7_8_10_Server_32_64_Bit, attack_target Client_Endpoint, created_at 2020_12_08, deployment Perimeter, signature_severity Major, updated_at 2020_12_12;)
318 |  
319 | As expected, we have no warnings when doing the engine analysis: ::
320 | 
321 |     Rule matches on http server body buffer.
322 |     Rule matches on http response line buffer.
323 |     App layer protocol is http.
324 |     Rule contains 0 content options, 2 http content options, 0 pcre options, and 0 pcre options with http modifiers.
325 |     Fast Pattern "{\x22meta\x22:{},\x22status\x22:\x22OK\x22,\x22saved\x22:\x221\x22,\x22starttime\x22:17656184060,\x22id\x22:\x22\x22,\x22vims\x22:{\x22dtc\x22:\x22" on "http response body, smb files or smtp attachments data (file_data)" buffer.
326 |     No warnings for this rule.
327 | 
328 | This signature has some differences to our attempt. It uses `file.data` to match in the `http.response_body` but it is quite the same thing. It also forces the `fast_pattern` on this part of the content which should not be necessary but is always safe to do.
329 | 
330 | The rest is metadata and information. We first have the reference: ::
331 | 
332 |  reference:url,github.com/fireeye/red_team_tool_countermeasures;
333 | 
334 | Then the classification: ::
335 | 
336 |  classtype:trojan-activity;
337 | 
338 | And finally the metadata: ::
339 | 
340 |   metadata:affected_product Windows_XP_Vista_7_8_10_Server_32_64_Bit, attack_target Client_Endpoint,\
341 |         created_at 2020_12_08, deployment Perimeter, signature_severity Major, updated_at 2020_12_12;
342 | 
343 | These pieces of metadata are important because we will find them in the alert event as shown on :numref:`alert-metadata`
344 | 
345 | .. _alert-metadata:
346 | 
347 | .. figure:: img/alert-metadata.png
348 |    :scale: 70 %
349 | 
350 |    Metadata in the alert event
351 | 
352 | This allows efficient and flexible classifications of the alert events that can be used in queries and the interface. For example, it can be used to present the variety of alerts seen in a system like the one shown on :numref:`metadata-panel`
353 | 
354 | .. _metadata-panel:
355 | 
356 | .. figure:: img/alert-sig-metadata.png
357 | 
358 |    Panels using signature metadata in Scirius
359 | 
360 | The result is shown in the `Scirius <https://github.com/StamusNetworks/scirius>`_ interface but any data lake that understands JSON will be able to build the same type of visualization.
361 | 
362 | Or for the created and updated date, a nice way to see which recent signatures did fire on the probes like shown on :numref:`signatures-ordered`
363 | 
364 | .. _signatures-ordered:
365 | 
366 | .. figure:: img/signatures-ordered.png
367 | 
368 |    Signatures ordered by creation date in Scirius
369 | 
370 | 
371 | Fixing warnings from Suricata Language Server
372 | =============================================
373 | 
374 | The :ref:`Suricata Language Server <suricata-ls>` uses Suricata features to display warning and hints in IDE and text editors that support LSP.
375 | Some of the warnings may appear confusing at first, so let's take a tour to understand them and discover how to fix them.
376 | 
377 | 
378 | Directionality warning
379 | ----------------------
380 | 
381 | .. figure:: img/directionality-warning.png
382 | 
383 |    Directionality warning seen in Neovim
384 | 
385 | The signature ::
386 | 
387 |  alert tcp any any -> any any (msg:"toto out"; content:"toto"; sid:1; rev:1;)
388 | 
389 | triggers the following warning: 'Rule inspect server and client side, consider adding a flow keyword`
390 | 
391 | In this signature, the `content` match has no sticky buffer or content modifier attached. As a result, the match is done on the TCP stream data. TCP stream goes two ways, so the inspection will be done for all data going to the server and all data going to the client. In most cases, this is not what we
392 | want to match as we usually know that the pattern should be in a client or server message.
393 | 
394 | So the correct signature would look something like this: ::
395 | 
396 |   alert tcp any any -> any any (msg:"toto out"; content:"toto"; \\
397 |             flow:established,to_server; \\
398 |             sid:1; rev:1;)
399 | 
400 | By doing this, the inspection will only be done on the packet going to the server. As a result, the inspection work is cut in half as we are just inspecting one way.
401 | 
402 | 
403 | Mixed content
404 | -------------
405 | 
406 | .. figure:: img/mixed-content.png
407 | 
408 |    Mixed content warning seen in Neovim
409 | 
410 | 
411 | The signature ::
412 | 
413 |  alert http any any -> any any (msg:"Doc reader with curl"; \\
414 |             content:"/rtfm"; \\
415 |             http.user_agent; content:"curl"; \\
416 |             sid:2; rev:1;)
417 | 
418 | triggers the following warning: 'Application layer "http2" combined with raw match, consider using a match on application buffer'
419 | 
420 | In the signature the first match `content:"/rtfm"` is done on TCP stream data as there is no sticky buffer or content modifier associated
421 | with it. But the second match, `http.user_agent; content:"curl";`, is done on the HTTP user agent buffer. This setup is not natural as it
422 | is better to work on one of the HTTP fields for all the matches. If we look at the first match, it looks like an URL.
423 | 
424 | So the correct signature would look something like ::
425 | 
426 |  alert http any any -> any any (msg:"Doc reader with curl"; \\
427 |             http.uri; content:"/rtfm"; \\
428 |             http.user_agent; content:"curl"; \\
429 |             sid:2; rev:1;)
430 | 
431 | 
432 | Missing HTTP keywords
433 | ---------------------
434 | 
435 | .. figure:: img/missing-http.png
436 | 
437 |     Missing HTTP keywords warning seen in Neovim
438 | 
439 | The signature ::
440 | 
441 |  alert http any any -> any any (msg:"Doc reader"; content:"GET /rtfm"; sid:3; rev:1;)
442 | 
443 | triggers the following warning: 'pattern looks like it inspects HTTP, use http.request_line or http.method and http.uri instead for improved performance'
444 | 
445 | In this signature, we have a single content match that searched for 2 words and looks like a part of an HTTP request. Suricata
446 | did detect that and is warning that it would be better to use proper HTTP keywords. This will be better for multiple reasons. First, the HTTP
447 | keywords match on normalized strings and it will improve the resilience of the signature to evasion compared to a simple content match.
448 | Second, it is far more accurate to use matches on HTTP fields. In this particular case, the signature will alert on any HTTP stream
449 | that contains `GET /rtfm`. As a consequence, it will, for example, alert if the signature file is downloaded over HTTP.
450 | 
451 | So the correct signature would look more like this: ::
452 | 
453 |  alert http any any -> any any (msg:"Doc reader with curl"; \\
454 |             http.method; content: "GET"; \\
455 |             http.uri; content:"/rtfm"; \\
456 |             sid:2; rev:1;)
457 | 
458 | We have a match on the HTTP method followed by a match on the URI.
459 | 
460 | .. _rules-perfomance-improvement:
461 | 
462 | 
463 | Performance Improvement process
464 | ===============================
465 | 
466 | There are always multiple ways to write a rule. The variants depend on what you are going to match on and what methods are being used for that
467 | match. For example, the two following rules may match the same way on a sample, but could have varying levels of performance: ::
468 | 
469 |  alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"Test - Rule variant - 1"; \\
470 |             flow:established,to_server; \\
471 |             http.method; content:"GET"; http.uri; \\
472 |             content:"lookforthis"; \\
473 |             classtype:command-and-control; sid:1000002; rev:1; \\
474 |             metadata:created_at 2022_08_10, updated_at 2022_08_10;)
475 |  
476 |  alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"Test - Rule variant - 2"; \\
477 |             flow:established,to_server; urilen:25; \\
478 |             http.method; content:"GET"; http.uri; \\
479 |             content:"lookforthis"; http.cookie; content:"lookforthat"; \\
480 |             classtype:command-and-control; sid:1000003; rev:1; \\
481 |             metadata:created_at 2022_08_10, updated_at 2022_08_10;)
482 | 
483 | To validate the performance of a rule and select the best one, it must be be ran and evaluated over both relevant and non relevant pcaps so the impact
484 | of the rule can be seen on all types of traffic. To do so, you must run the rule through both types of pcaps while having `rule-profiling` enabled.
485 | 
486 | The signature needs to be complete (See steps in :ref:`Signature writing process <write-signature>`) before you can test its performance.
487 | 
488 | 
489 | #. Verify the rule syntax with Suricata Language Server or run Suricata with `--engine-analysis`
490 | #. Use a pcap with relevant traffic
491 | 
492 |    - Run the pcap and the rules with suricata that has rules profiling enabled. A relevant section in the suricata `suricata.yaml` config can be used to adjust sorting or to enable text and JSON outputs
493 |    - Review the results in `rule_perf.log` and make further adjustments as needed. See :ref:`Profile information <profiling-info>` for details
494 | 
495 | #. Use a pcap with non relevant traffic.
496 | 
497 |    - Run with rules profiling
498 |    - Review the results
499 | 
500 | The winning rule is the one with the lowest impact to performance on the relevant traffic and ideally done not appear (aka is not being evaluated at all) in the non-relevant traffic pcap run.
501 | 
502 | 


--------------------------------------------------------------------------------