├── data
    ├── answers.json
    ├── aad_logons.pkl
    ├── alerts_list.pkl
    ├── host_logons.pkl
    ├── screenshot.png
    ├── syslog_data.pkl
    ├── timeseries.pkl
    ├── all_events_df.pkl
    ├── exchange_admin.pkl
    ├── failedLogons.pkl
    ├── process_tree.pkl
    ├── az_net_comms_df.pkl
    ├── processes_on_host.pkl
    ├── list_aad_signins_for_account
    ├── list_activity_for_account.pkl
    ├── list_aad_signins_for_account.pkl
    └── data_queries.yaml
├── requirements.txt
├── msticpyconfig.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── SUPPORT.md
├── SECURITY.md
├── README.md
├── .gitignore
└── MSTICPy_Lab.ipynb


/data/answers.json:
--------------------------------------------------------------------------------
1 | {"question1": "2020-07-10", "question2": "29670"}


--------------------------------------------------------------------------------
/data/aad_logons.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/aad_logons.pkl


--------------------------------------------------------------------------------
/data/alerts_list.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/alerts_list.pkl


--------------------------------------------------------------------------------
/data/host_logons.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/host_logons.pkl


--------------------------------------------------------------------------------
/data/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/screenshot.png


--------------------------------------------------------------------------------
/data/syslog_data.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/syslog_data.pkl


--------------------------------------------------------------------------------
/data/timeseries.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/timeseries.pkl


--------------------------------------------------------------------------------
/data/all_events_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/all_events_df.pkl


--------------------------------------------------------------------------------
/data/exchange_admin.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/exchange_admin.pkl


--------------------------------------------------------------------------------
/data/failedLogons.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/failedLogons.pkl


--------------------------------------------------------------------------------
/data/process_tree.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/process_tree.pkl


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | msticpy==1.8.2
2 | scikit-learn>=0.20.2
3 | scipy>=1.1.0
4 | statsmodels>=0.11.1
5 | 


--------------------------------------------------------------------------------
/data/az_net_comms_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/az_net_comms_df.pkl


--------------------------------------------------------------------------------
/data/processes_on_host.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/processes_on_host.pkl


--------------------------------------------------------------------------------
/data/list_aad_signins_for_account:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/list_aad_signins_for_account


--------------------------------------------------------------------------------
/data/list_activity_for_account.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/list_activity_for_account.pkl


--------------------------------------------------------------------------------
/data/list_aad_signins_for_account.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/msticpy-lab/HEAD/data/list_aad_signins_for_account.pkl


--------------------------------------------------------------------------------
/msticpyconfig.yaml:
--------------------------------------------------------------------------------
1 | QueryDefinitions: null
2 | TIProviders:
3 |   GreyNoise:
4 |     Args:
5 |       AuthKey: ""
6 |     Primary: true
7 |     Provider: GreyNoise
8 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MSTICPy Lab
 2 | 
 3 | This repo contains the code and data for an interactive lab in which to learn about [MSTICPy](https://github.com/microsoft/msticpy).<br>
 4 | If you don't know anything about MSTICPy it is recommended that you start by reading [our documentation over at ReadTheDocs](https://msticpy.readthedocs.io/en/latest/)<br>
 5 | 
 6 | Once you understand what MSTICPy is you are ready to jump into the lab, the main component of which is a Jupyter Notebook. 
 7 | 
 8 | ## What we will cover in the lab
 9 | In this lab you will learn about and use the main features of MSTICPy. The lab is split into several sections, each one focused on a key element of MSTICPy:
10 | - Data Acquisition
11 | - Data Enrichment
12 | - Extracting Key Data
13 | - Data Visualization
14 | - Pivots in MSTICPy
15 | - ML in MSTICPy
16 | 
17 | <img src="https://github.com/microsoft/msticpy-lab/blob/main/data/screenshot.png?raw=true" alt="Screenshot of the lab" width="600"/>
18 | 
19 | In each section you will have a set of guided examples that show you how the features works and how to call them. After the examples are short lab exercises for you to complete, these involve using the features you have just seen examples of. 
20 | Don't worry if you can't complete any of the lab exercises, they are not required to move onto the next section. You can also view a completed version of the notebook [here](https://github.com/microsoft/msticpy-lab/blob/main/MSTICPy_Lab_Completed.ipynb).
21 | 
22 | ## How to use the lab
23 | If you wish you can download this repo and run the notebook locally. However, a much easier experience is to click the 'Launch Binder' button below to launch the lab using [Binder](https://mybinder.org/) this will load the notebook in a pre-configured environment that you can access straight from the browser without any setup required. The notebook contains all instructions and resources required for the lab.
24 | 
25 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/microsoft/msticpy-lab/HEAD?filepath=MSTICPy_Lab.ipynb)
26 | 
27 | If you have any questions or feedback, please open an issue or contact msticpy@microsoft.com.
28 | 
29 | ## Contributing
30 | 
31 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
32 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
33 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
34 | 
35 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
36 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
37 | provided by the bot. You will only need to do this once across all repos using our CLA.
38 | 
39 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
40 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
41 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
42 | 
43 | ## Trademarks
44 | 
45 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
46 | trademarks or logos is subject to and must follow 
47 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
48 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
49 | Any use of third-party trademarks or logos are subject to those third-party's policies.
50 | 


--------------------------------------------------------------------------------
/data/data_queries.yaml:
--------------------------------------------------------------------------------
  1 | metadata:
  2 |   version: 1
  3 |   description: Local Data Alert Queries
  4 |   data_environments: [LocalData]
  5 |   data_families: [SecurityAlert, WindowsSecurity, Network, Azure, LinuxSyslog]
  6 |   tags: ['alert', 'securityalert', 'process', 'account', 'network', 'logon']
  7 | defaults:
  8 |   metadata:
  9 |     data_source: 'security_alert'
 10 |   parameters:
 11 | sources:
 12 |   get_network_summary:
 13 |     description: Network summary for Timeseries
 14 |     metadata:
 15 |       data_families: [Network]
 16 |     args:
 17 |       query: timeseries.pkl
 18 |     parameters:
 19 |   list_host_logons:
 20 |     description: List logons on host
 21 |     metadata:
 22 |       data_families: [WindowsSecurity]
 23 |     args:
 24 |       query: host_logons.pkl
 25 |     parameters:
 26 |   list_host_processes:
 27 |     description: List processes on host
 28 |     metadata:
 29 |       data_families: [WindowsSecurity]
 30 |     args:
 31 |       query: processes_on_host.pkl
 32 |     parameters:
 33 |   OfficeActivity:
 34 |     description: Office 365 activity
 35 |     metadata:
 36 |       data_families: [Azure]
 37 |     args:
 38 |       query: exchange_admin.pkl
 39 |     parameters:
 40 |   list_alerts:
 41 |     description: Retrieves list of alerts
 42 |     metadata:
 43 |       data_families: [SecurityAlert]
 44 |     args:
 45 |       query: alerts_list.pkl
 46 |     parameters:
 47 |   process_hierarchy:
 48 |     description: Show hierarchy of processes on host
 49 |     metadata:
 50 |       data_families: [WindowsSecurity]
 51 |     args:
 52 |       query: process_tree.pkl
 53 |     parameters:
 54 |   list_host_logon_failures:
 55 |     description: List logon failures on host
 56 |     metadata:
 57 |       data_families: [WindowsSecurity]
 58 |     args:
 59 |       query: failed_logons.pkl
 60 |     parameters:
 61 |   list_host_events:
 62 |     description: List events failures on host
 63 |     metadata:
 64 |       data_families: [WindowsSecurity]
 65 |     args:
 66 |       query: all_events_df.pkl
 67 |     parameters:
 68 |   get_process_tree:
 69 |     description: Get process tree for a process
 70 |     metadata:
 71 |       data_families: [WindowsSecurity]
 72 |     args:
 73 |       query: process_tree.pkl
 74 |     parameters:
 75 |   list_azure_network_flows_by_ip:
 76 |     description: List Azure Network flows by IP address
 77 |     metadata:
 78 |       data_families: [Network]
 79 |     args:
 80 |       query: az_net_comms_df.pkl
 81 |     parameters:
 82 |   list_azure_network_flows_by_host:
 83 |     description: List Azure Network flows by host name
 84 |     metadata:
 85 |       data_families: [Network]
 86 |     args:
 87 |       query: az_net_comms_df.pkl
 88 |     parameters:
 89 |   list_all_signins_geo:
 90 |     description: List all Azure AD logon events
 91 |     metadata:
 92 |       data_families: [Azure]
 93 |     args:
 94 |       query: aad_logons.pkl
 95 |     parameters:
 96 |   list_all_syslog_events:
 97 |     description: List all Syslog events
 98 |     metadata:
 99 |       data_families: [LinuxSyslog]
100 |     args:
101 |       query: syslog_data.pkl
102 |     parameters:
103 |   list_aad_signins_for_account:
104 |     description: List all Azure AD logons for a user
105 |     metadata:
106 |       data_families: [Azure]
107 |     args:
108 |       query: list_aad_signins_for_account.pkl
109 |   list_activity_for_account:
110 |     description: List all Office365 activity for a user
111 |     metadata:
112 |       data_families: [Office365]
113 |     args:
114 |       query: list_activity_for_account.pkl      
115 |   list_logon_attempts_by_account:
116 |     description: Get all logons for an account
117 |     metadata:
118 |       data_families: [WindowsSecurity]
119 |     args:
120 |       query: host_logons.pkl
121 |   list_logons_for_account:
122 |     description: List all Syslog events
123 |     metadata:
124 |       data_families: [LinuxSyslog]
125 |     args:
126 |       query: syslog_data.pkl
127 |     parameters:


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Aa][Rr][Mm]/
 27 | [Aa][Rr][Mm]64/
 28 | bld/
 29 | [Bb]in/
 30 | [Oo]bj/
 31 | [Ll]og/
 32 | [Ll]ogs/
 33 | 
 34 | # Visual Studio 2015/2017 cache/options directory
 35 | .vs/
 36 | # Uncomment if you have tasks that create the project's static files in wwwroot
 37 | #wwwroot/
 38 | 
 39 | # Visual Studio 2017 auto generated files
 40 | Generated\ Files/
 41 | 
 42 | # MSTest test Results
 43 | [Tt]est[Rr]esult*/
 44 | [Bb]uild[Ll]og.*
 45 | 
 46 | # NUnit
 47 | *.VisualState.xml
 48 | TestResult.xml
 49 | nunit-*.xml
 50 | 
 51 | # Build Results of an ATL Project
 52 | [Dd]ebugPS/
 53 | [Rr]eleasePS/
 54 | dlldata.c
 55 | 
 56 | # Benchmark Results
 57 | BenchmarkDotNet.Artifacts/
 58 | 
 59 | # .NET Core
 60 | project.lock.json
 61 | project.fragment.lock.json
 62 | artifacts/
 63 | 
 64 | # StyleCop
 65 | StyleCopReport.xml
 66 | 
 67 | # Files built by Visual Studio
 68 | *_i.c
 69 | *_p.c
 70 | *_h.h
 71 | *.ilk
 72 | *.meta
 73 | *.obj
 74 | *.iobj
 75 | *.pch
 76 | *.pdb
 77 | *.ipdb
 78 | *.pgc
 79 | *.pgd
 80 | *.rsp
 81 | *.sbr
 82 | *.tlb
 83 | *.tli
 84 | *.tlh
 85 | *.tmp
 86 | *.tmp_proj
 87 | *_wpftmp.csproj
 88 | *.log
 89 | *.vspscc
 90 | *.vssscc
 91 | .builds
 92 | *.pidb
 93 | *.svclog
 94 | *.scc
 95 | 
 96 | # Chutzpah Test files
 97 | _Chutzpah*
 98 | 
 99 | # Visual C++ cache files
100 | ipch/
101 | *.aps
102 | *.ncb
103 | *.opendb
104 | *.opensdf
105 | *.sdf
106 | *.cachefile
107 | *.VC.db
108 | *.VC.VC.opendb
109 | 
110 | # Visual Studio profiler
111 | *.psess
112 | *.vsp
113 | *.vspx
114 | *.sap
115 | 
116 | # Visual Studio Trace Files
117 | *.e2e
118 | 
119 | # TFS 2012 Local Workspace
120 | $tf/
121 | 
122 | # Guidance Automation Toolkit
123 | *.gpState
124 | 
125 | # ReSharper is a .NET coding add-in
126 | _ReSharper*/
127 | *.[Rr]e[Ss]harper
128 | *.DotSettings.user
129 | 
130 | # TeamCity is a build add-in
131 | _TeamCity*
132 | 
133 | # DotCover is a Code Coverage Tool
134 | *.dotCover
135 | 
136 | # AxoCover is a Code Coverage Tool
137 | .axoCover/*
138 | !.axoCover/settings.json
139 | 
140 | # Visual Studio code coverage results
141 | *.coverage
142 | *.coveragexml
143 | 
144 | # NCrunch
145 | _NCrunch_*
146 | .*crunch*.local.xml
147 | nCrunchTemp_*
148 | 
149 | # MightyMoose
150 | *.mm.*
151 | AutoTest.Net/
152 | 
153 | # Web workbench (sass)
154 | .sass-cache/
155 | 
156 | # Installshield output folder
157 | [Ee]xpress/
158 | 
159 | # DocProject is a documentation generator add-in
160 | DocProject/buildhelp/
161 | DocProject/Help/*.HxT
162 | DocProject/Help/*.HxC
163 | DocProject/Help/*.hhc
164 | DocProject/Help/*.hhk
165 | DocProject/Help/*.hhp
166 | DocProject/Help/Html2
167 | DocProject/Help/html
168 | 
169 | # Click-Once directory
170 | publish/
171 | 
172 | # Publish Web Output
173 | *.[Pp]ublish.xml
174 | *.azurePubxml
175 | # Note: Comment the next line if you want to checkin your web deploy settings,
176 | # but database connection strings (with potential passwords) will be unencrypted
177 | *.pubxml
178 | *.publishproj
179 | 
180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
181 | # checkin your Azure Web App publish settings, but sensitive information contained
182 | # in these scripts will be unencrypted
183 | PublishScripts/
184 | 
185 | # NuGet Packages
186 | *.nupkg
187 | # NuGet Symbol Packages
188 | *.snupkg
189 | # The packages folder can be ignored because of Package Restore
190 | **/[Pp]ackages/*
191 | # except build/, which is used as an MSBuild target.
192 | !**/[Pp]ackages/build/
193 | # Uncomment if necessary however generally it will be regenerated when needed
194 | #!**/[Pp]ackages/repositories.config
195 | # NuGet v3's project.json files produces more ignorable files
196 | *.nuget.props
197 | *.nuget.targets
198 | 
199 | # Microsoft Azure Build Output
200 | csx/
201 | *.build.csdef
202 | 
203 | # Microsoft Azure Emulator
204 | ecf/
205 | rcf/
206 | 
207 | # Windows Store app package directories and files
208 | AppPackages/
209 | BundleArtifacts/
210 | Package.StoreAssociation.xml
211 | _pkginfo.txt
212 | *.appx
213 | *.appxbundle
214 | *.appxupload
215 | 
216 | # Visual Studio cache files
217 | # files ending in .cache can be ignored
218 | *.[Cc]ache
219 | # but keep track of directories ending in .cache
220 | !?*.[Cc]ache/
221 | 
222 | # Others
223 | ClientBin/
224 | ~$*
225 | *~
226 | *.dbmdl
227 | *.dbproj.schemaview
228 | *.jfm
229 | *.pfx
230 | *.publishsettings
231 | orleans.codegen.cs
232 | 
233 | # Including strong name files can present a security risk
234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
235 | #*.snk
236 | 
237 | # Since there are multiple workflows, uncomment next line to ignore bower_components
238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
239 | #bower_components/
240 | 
241 | # RIA/Silverlight projects
242 | Generated_Code/
243 | 
244 | # Backup & report files from converting an old project file
245 | # to a newer Visual Studio version. Backup files are not needed,
246 | # because we have git ;-)
247 | _UpgradeReport_Files/
248 | Backup*/
249 | UpgradeLog*.XML
250 | UpgradeLog*.htm
251 | ServiceFabricBackup/
252 | *.rptproj.bak
253 | 
254 | # SQL Server files
255 | *.mdf
256 | *.ldf
257 | *.ndf
258 | 
259 | # Business Intelligence projects
260 | *.rdl.data
261 | *.bim.layout
262 | *.bim_*.settings
263 | *.rptproj.rsuser
264 | *- [Bb]ackup.rdl
265 | *- [Bb]ackup ([0-9]).rdl
266 | *- [Bb]ackup ([0-9][0-9]).rdl
267 | 
268 | # Microsoft Fakes
269 | FakesAssemblies/
270 | 
271 | # GhostDoc plugin setting file
272 | *.GhostDoc.xml
273 | 
274 | # Node.js Tools for Visual Studio
275 | .ntvs_analysis.dat
276 | node_modules/
277 | 
278 | # Visual Studio 6 build log
279 | *.plg
280 | 
281 | # Visual Studio 6 workspace options file
282 | *.opt
283 | 
284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
285 | *.vbw
286 | 
287 | # Visual Studio LightSwitch build output
288 | **/*.HTMLClient/GeneratedArtifacts
289 | **/*.DesktopClient/GeneratedArtifacts
290 | **/*.DesktopClient/ModelManifest.xml
291 | **/*.Server/GeneratedArtifacts
292 | **/*.Server/ModelManifest.xml
293 | _Pvt_Extensions
294 | 
295 | # Paket dependency manager
296 | .paket/paket.exe
297 | paket-files/
298 | 
299 | # FAKE - F# Make
300 | .fake/
301 | 
302 | # CodeRush personal settings
303 | .cr/personal
304 | 
305 | # Python Tools for Visual Studio (PTVS)
306 | __pycache__/
307 | *.pyc
308 | 
309 | # Cake - Uncomment if you are using it
310 | # tools/**
311 | # !tools/packages.config
312 | 
313 | # Tabs Studio
314 | *.tss
315 | 
316 | # Telerik's JustMock configuration file
317 | *.jmconfig
318 | 
319 | # BizTalk build output
320 | *.btp.cs
321 | *.btm.cs
322 | *.odx.cs
323 | *.xsd.cs
324 | 
325 | # OpenCover UI analysis results
326 | OpenCover/
327 | 
328 | # Azure Stream Analytics local run output
329 | ASALocalRun/
330 | 
331 | # MSBuild Binary and Structured Log
332 | *.binlog
333 | 
334 | # NVidia Nsight GPU debugger configuration file
335 | *.nvuser
336 | 
337 | # MFractors (Xamarin productivity tool) working folder
338 | .mfractor/
339 | 
340 | # Local History for Visual Studio
341 | .localhistory/
342 | 
343 | # BeatPulse healthcheck temp database
344 | healthchecksdb
345 | 
346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
347 | MigrationBackup/
348 | 
349 | # Ionide (cross platform F# VS Code tools) working folder
350 | .ionide/
351 | 


--------------------------------------------------------------------------------
/MSTICPy_Lab.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# MSTICPy Lab\n",
   8 |     "\n",
   9 |     "This lab provides an interactive introduction to MSTICPy and its core features. It uses local datasets to provide a repeatable experience, however this follows the same pattern as if you were using data from a remote data store via one of MSTICPy's [Data Providers](https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProviders.html)<br>\n",
  10 |     "\n",
  11 |     "\n",
  12 |     "If you require more information during this lab more details can be found in the [MSTICPy documentation](https://msticpy.readthedocs.io/en/latest/)."
  13 |    ]
  14 |   },
  15 |   {
  16 |    "cell_type": "markdown",
  17 |    "metadata": {},
  18 |    "source": [
  19 |     "## How to use this lab\n",
  20 |     "\n",
  21 |     "The lab contains a number of interactive code examples as well as exercises to be completed. The exercises are entirely optional and can be skipped if wanted, however they provide a useful way to learn about using some of the core features of MSTICPy.<br>\n",
  22 |     "\n",
  23 |     "To use the notebook simply select each cell below and either click the run cell button at the top or alternatively use the keyboard shortcut of Ctrl+Enter to execute each cell. Many cells will use the output of previous cells so its strongly recommended that cells be run in order.<br>\n",
  24 |     "\n",
  25 |     "<div class=\"alert alert-block alert-info\">\n",
  26 |     "    <b>Note:</b> not all cells will have an output, do not be surprised if you do not see anything appear under a cell after running it. Also, some cells may take a while to run so please be a bit patient. For more help on running Jupyter notebooks please refer to <a href=\"https://jupyter.readthedocs.io/en/latest/running.html\">this documentation.</a>\n",
  27 |     "</div>\n",
  28 |     "\n",
  29 |     "If you get stuck with any of the exercises in this lab you can check your answers in the [completed notebook here](https://github.com/microsoft/msticpy-lab/blob/main/MSTICPy_Lab_Completed.ipynb).<br>\n",
  30 |     "\n",
  31 |     "In several places this notebook uses lookups to the external threat intelligence provider [GreyNoise](https://GreyNoise.io/). As this is an online service you may get different responses as that data is updated. Do not be surprised if you get no positive results when running the sections of this notebook where threat intelligence is used."
  32 |    ]
  33 |   },
  34 |   {
  35 |    "cell_type": "markdown",
  36 |    "metadata": {},
  37 |    "source": [
  38 |     "## Setup\n",
  39 |     "\n",
  40 |     "MSTICpy includes a feature called [nbinit](https://msticpy.readthedocs.io/en/latest/msticpy.nbtools.html?highlight=nbinit#module-msticpy.nbtools.nbinit) that handles the process of installing and importing modules into a notebook environment. This was developed to allow for a clearer starting cell in notebooks and to avoid users being presented with a very large cell block at the top of a notebook.<br>\n",
  41 |     "\n",
  42 |     "By passing the notebook namespace to init_notebook() this function handles the job of installing and importing core MSTICpy packages along with any others that might be needed by a notebook. When running this cell you may see some warnings - **this is to be expected and will not affect the rest of the lab** - they are simply show as we are not using a completed configuration in this scenario.\n",
  43 |     "\n",
  44 |     "<div class=\"alert alert-block alert-info\">\n",
  45 |     "<b>Note:</b> When running this cell you may see some warnings - <b>this is to be expected and will not affect the rest of the lab</b> - they are simply show as we are not using a completed configuration in this scenario.\n",
  46 |     "</div>\n",
  47 |     "\n",
  48 |     "You must have msticpy installed to run this notebook (if using binder this lab has the package pre-installed for you):\n",
  49 |     "```\n",
  50 |     "!pip install --upgrade msticpy[timeseries, splunk, azsentinel]\n",
  51 |     "```\n",
  52 |     "MSTICpy versions > 1.0.1\n",
  53 |     "\n",
  54 |     "The notebook also uses MSTIC Notebooklets (again pre-installed if using binder):\n",
  55 |     "```\n",
  56 |     "!pip install --upgrade msticnb\n",
  57 |     "```"
  58 |    ]
  59 |   },
  60 |   {
  61 |    "cell_type": "code",
  62 |    "execution_count": null,
  63 |    "metadata": {},
  64 |    "outputs": [],
  65 |    "source": [
  66 |     "from msticpy.nbtools import nbinit\n",
  67 |     "\n",
  68 |     "nbinit.init_notebook(namespace=globals())\n",
  69 |     "\n",
  70 |     "ti = TILookup()"
  71 |    ]
  72 |   },
  73 |   {
  74 |    "cell_type": "code",
  75 |    "execution_count": null,
  76 |    "metadata": {},
  77 |    "outputs": [],
  78 |    "source": [
  79 |     "# We also need to load a couple of anwsers for one of the exercises (no peeking!)\n",
  80 |     "import json\n",
  81 |     "\n",
  82 |     "with open(\"data/answers.json\") as f:\n",
  83 |     "    answers = json.load(f)"
  84 |    ]
  85 |   },
  86 |   {
  87 |    "cell_type": "markdown",
  88 |    "metadata": {},
  89 |    "source": [
  90 |     "## Data Acquisition\n",
  91 |     "The starting point for many security analysis notebooks is ingesting data to conduct analysis or investigation of. MSTICpy has a number of [query providers](https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProviders.html) to allow users to query and return data from a number of sources. Below we are using the Local Data query provider to return data from local files. This is useful for this lab but is also useful if analysis is relying on local data rather than a 'live' data source.<br> \n",
  92 |     "\n",
  93 |     "In order to provide a common interface layer between data and features in MSTICPy all data is returned in a Pandas [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) DataFrame. As well as providing a consistent framework for other features to user it also allows for easy manipulation and analysis of the returned data using Pandas numerous features."
  94 |    ]
  95 |   },
  96 |   {
  97 |    "cell_type": "markdown",
  98 |    "metadata": {},
  99 |    "source": [
 100 |     "The first step in using a query provider is to initialize a `QueryProvider` and pass it the type of provider you want to use. Depending on the provider type you can also pass other required parameters. In the cell below we create a LocalData provider and pass it the location of where are local data files and their definitions are stored.<br>\n",
 101 |     "\n",
 102 |     "Each provider contains a series of built-in queries. These are pre-defined queries that return a specific subset of data, for our LocalData provider this is a specific file, however for a 'live' data source such as Microsoft Sentinel these will execute queries against that data source and return the results.<br>\n",
 103 |     "\n",
 104 |     "Once the query provider has been created we can use the `browse_queries` feature to interactively view the available queries."
 105 |    ]
 106 |   },
 107 |   {
 108 |    "cell_type": "code",
 109 |    "execution_count": null,
 110 |    "metadata": {},
 111 |    "outputs": [],
 112 |    "source": [
 113 |     "# We start by loading a query provider for our `LocalData` source.\n",
 114 |     "qry_prov = QueryProvider(\n",
 115 |     "    data_environment=\"LocalData\", data_paths=[\"./data\"], query_paths=[\"./data\"]\n",
 116 |     ")\n",
 117 |     "# We can then look at the queries built into a provider by default\n",
 118 |     "qry_prov.browse_queries()"
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "markdown",
 123 |    "metadata": {},
 124 |    "source": [
 125 |     "------------------------\n",
 126 |     "\n",
 127 |     "Once a query has been selected you call it directly with `qry_prov.{query_group}.{query_name}` . You can also pass extra parameters to these queries where they have configurable elements (often things such as timeframes and specific entities to search for). In addition the query providers also allow you to execute a query defined as a string by calling `qry_prov.exec_query(QUERY_STRING)`<br>\n",
 128 |     "\n",
 129 |     "The returned dataframe contains the query results and can be displayed and interacted with as with any other Pandas dataframe."
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "code",
 134 |    "execution_count": null,
 135 |    "metadata": {},
 136 |    "outputs": [],
 137 |    "source": [
 138 |     "events = qry_prov.WindowsSecurity.list_host_events()\n",
 139 |     "events.head()"
 140 |    ]
 141 |   },
 142 |   {
 143 |    "cell_type": "markdown",
 144 |    "metadata": {},
 145 |    "source": [
 146 |     "<div class=\"alert alert-block alert-success\">\n",
 147 |     "<h3>Lab Exercise 1</h3>\n",
 148 |     "In the cell below write code that uses the query provider created above (`qry_prov`) to get data relating to security alerts using a built-in query. You can use the query browser above to find the most suitable query to run.\n",
 149 |     "\n",
 150 |     "<details>\n",
 151 |     "    <summary>Hint:</summary>\n",
 152 |     "    Queries relating to security alerts are part of the SecurityAlert query type.\n",
 153 |     "</details>\n",
 154 |     "</div>"
 155 |    ]
 156 |   },
 157 |   {
 158 |    "cell_type": "code",
 159 |    "execution_count": null,
 160 |    "metadata": {},
 161 |    "outputs": [],
 162 |    "source": [
 163 |     "# Get security alert data"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "markdown",
 168 |    "metadata": {},
 169 |    "source": [
 170 |     "## Enrich Data\n",
 171 |     "A key analysis step for security analysts is to take a dataset, extract relevant elements and enrich it with another dataset to help filter it.\n",
 172 |     "A common example of this is taking IP addresses in log data and seeing if any of them appear in threat intelligence data.<br>\n",
 173 |     "\n",
 174 |     "In the cells below we use MSTICPy's query provider to get sign in event data, and then look up the IPs those sign-ins have come from against a threat intelligence provider's API using the [MSTICPy threat intelligence](https://msticpy.readthedocs.io/en/latest/data_acquisition/TIProviders.html) features. In this case we are using the [GreyNoise](https://greynoise.io/) provider."
 175 |    ]
 176 |   },
 177 |   {
 178 |    "cell_type": "code",
 179 |    "execution_count": null,
 180 |    "metadata": {},
 181 |    "outputs": [],
 182 |    "source": [
 183 |     "# First we are going to use a built in query to get all of our signin data from our Windows host\n",
 184 |     "data = qry_prov.WindowsSecurity.list_host_logons()\n",
 185 |     "data.head()"
 186 |    ]
 187 |   },
 188 |   {
 189 |    "cell_type": "markdown",
 190 |    "metadata": {},
 191 |    "source": [
 192 |     "MSTICPy includes a [Threat Intelligence (TI) lookup provider](https://msticpy.readthedocs.io/en/latest/data_acquisition/TIProviders.html) `TILookup` that allows for key indicators to be searched for in various different services. The provider can be configured to use a range of different providers, and queries can be specifically targeted at a provider if required.\n",
 193 |     "MSTICPY current supports the following providers:\n",
 194 |     "- VirusTotal\n",
 195 |     "- AlienVault OTX\n",
 196 |     "- IBM XForce\n",
 197 |     "- GreyNoise\n",
 198 |     "- Microsoft Sentinel Threat Intelligence\n",
 199 |     "\n",
 200 |     "There is also support via the TI lookup provider to get the [Open Page Rank](https://www.domcop.com/openpagerank/what-is-openpagerank#:~:text=What%20Is%20Open%20PageRank%3F%20The%20Open%20PageRank%20initiative,has%20been%20collected%20over%20the%20last%207%20years) for a domain, and determine if an IP address is a [ToR]( https://www.torproject.org/) exit node.<br>\n",
 201 |     "\n",
 202 |     "When instantiating a TI provider you can define the providers you want it to load, or you can let it search for a [MSTICPy config file](https://msticpy.readthedocs.io/en/latest/getting_started/msticpyconfig.html) and take configuration from there - this is the approach we are taking in this lab."
 203 |    ]
 204 |   },
 205 |   {
 206 |    "cell_type": "code",
 207 |    "execution_count": null,
 208 |    "metadata": {},
 209 |    "outputs": [],
 210 |    "source": [
 211 |     "# Next we need to load our TI providers\n",
 212 |     "ti = TILookup()"
 213 |    ]
 214 |   },
 215 |   {
 216 |    "cell_type": "markdown",
 217 |    "metadata": {},
 218 |    "source": [
 219 |     "`.loaded_providers` shows which providers have been loaded by the TI lookup provider."
 220 |    ]
 221 |   },
 222 |   {
 223 |    "cell_type": "code",
 224 |    "execution_count": null,
 225 |    "metadata": {},
 226 |    "outputs": [],
 227 |    "source": [
 228 |     "# For this lab we are just using the GreyNoise provider\n",
 229 |     "ti.loaded_providers"
 230 |    ]
 231 |   },
 232 |   {
 233 |    "cell_type": "markdown",
 234 |    "metadata": {},
 235 |    "source": [
 236 |     "Once loaded you can use `lookup_ioc` to look up a single indicator, or `lookup_iocs` to look up every value in a dataframe column.<br>\n",
 237 |     "\n",
 238 |     "In this example we want to look up every IP address in our results dataframe, so we are going to use `lookup_iocs`, tell it to look up values in the \"IpAddress\" column, and use the GreyNoise service to do the lookups.<br>\n",
 239 |     "\n",
 240 |     "Once we have results you can either display the results statically or use `browse_results` to get an interactive view of the results."
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "code",
 245 |    "execution_count": null,
 246 |    "metadata": {},
 247 |    "outputs": [],
 248 |    "source": [
 249 |     "from msticpy.nbtools import ti_browser\n",
 250 |     "\n",
 251 |     "# Here we lookup each of the IP addresses in our dataset\n",
 252 |     "results = ti.lookup_iocs(data, obs_col=\"IpAddress\", providers=[\"GreyNoise\"])\n",
 253 |     "ti_browser.browse_results(results, severities=[\"information\", \"warning\", \"high\"])"
 254 |    ]
 255 |   },
 256 |   {
 257 |    "cell_type": "markdown",
 258 |    "metadata": {},
 259 |    "source": [
 260 |     "<div class=\"alert alert-block alert-success\">\n",
 261 |     "<h3>Lab Exercise 2</h3>\n",
 262 |     "Now that you have seen how to return and enrich data, complete the code in the following two cells to get a list of Azure AD sign in events and look up the origin IP addresses against threat intelligence.<br>\n",
 263 |     "\n",
 264 |     "Additional documentation on the threat intelligence provider can be found [here](https://msticpy.readthedocs.io/en/latest/data_acquisition/TIProviders.html)\n",
 265 |     "\n",
 266 |     "<details>\n",
 267 |     "    <summary>Hint:</summary>\n",
 268 |     "    <ul>\n",
 269 |     "        <li>Remember to pass `obs_col=\"IPAddress\"` to `lookup_iocs` to get lookups on the correct column</li>\n",
 270 |     "        <li>You can reuse the TI provider assigned to `ti` in the cells above</li>\n",
 271 |     "    </ul>\n",
 272 |     "</details>\n",
 273 |     "</div>\n",
 274 |     "    \n"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": null,
 280 |    "metadata": {},
 281 |    "outputs": [],
 282 |    "source": [
 283 |     "# Use the query provider qry_prov to get Azure signin data with the list_all_signins_geo query\n"
 284 |    ]
 285 |   },
 286 |   {
 287 |    "cell_type": "code",
 288 |    "execution_count": null,
 289 |    "metadata": {},
 290 |    "outputs": [],
 291 |    "source": [
 292 |     "# Lookup the IP addresses in the IPAddress column using the GreyNoise TI provider\n"
 293 |    ]
 294 |   },
 295 |   {
 296 |    "cell_type": "markdown",
 297 |    "metadata": {},
 298 |    "source": [
 299 |     "## Extracting key data\n",
 300 |     "Often when working with security related data the indicators you need are not as readily available as they were in the example above. Often, they can be encoded or otherwise obscured from human analysis. MSTICPy also includes tooling to help security analysts quickly decode this data for further analysis.<br>\n",
 301 |     "\n",
 302 |     "In this section we are going to query our datasets for command line data, decode any Base64 encoding in the command lines using the [`base64` feature](https://msticpy.readthedocs.io/en/latest/data_analysis/Base64Unpack.html), and then extract known indicator types (such as IP addresses and domain names) from that data using the [`IoCExtract` feature](https://msticpy.readthedocs.io/en/latest/data_analysis/IoCExtract.html)."
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "code",
 307 |    "execution_count": null,
 308 |    "metadata": {},
 309 |    "outputs": [],
 310 |    "source": [
 311 |     "# Load command line data set\n",
 312 |     "cmdl_data = qry_prov.WindowsSecurity.list_host_processes()\n",
 313 |     "cmdl_data.head()"
 314 |    ]
 315 |   },
 316 |   {
 317 |    "cell_type": "markdown",
 318 |    "metadata": {},
 319 |    "source": [
 320 |     "Now that we have some data in a DataFrame, we can use MSTICP's `mp_b64` Pandas access to call  `extract` and tell it to unpack data found in the 'CommandLine' column. This feature will look for Base64 patterns in that column, attempt to unpack any it finds and present us with the decoded output.<br>\n",
 321 |     "\n",
 322 |     "`mp_b64.extract` provides an output of just elements relevant to the decoded string, to get some context on where this string was found we next join it back to the original dataset so that we can see the log event and the decoded string in the same dataset."
 323 |    ]
 324 |   },
 325 |   {
 326 |    "cell_type": "code",
 327 |    "execution_count": null,
 328 |    "metadata": {},
 329 |    "outputs": [],
 330 |    "source": [
 331 |     "# Base64 decode\n",
 332 |     "b64df = cmdl_data.head(1000).mp_b64.extract(column=\"CommandLine\")\n",
 333 |     "b64df[\"SourceIndex\"] = pd.to_numeric(b64df[\"src_index\"])\n",
 334 |     "merged_df = (\n",
 335 |     "    cmdl_data.merge(right=b64df, how=\"left\", left_index=True, right_on=\"SourceIndex\")\n",
 336 |     "    # .drop(columns=['Unnamed: 0'])\n",
 337 |     "    .set_index(\"SourceIndex\")\n",
 338 |     ")\n",
 339 |     "\n",
 340 |     "# Show the result of the merge (only those rows that have a value in original_string)\n",
 341 |     "merged_filtered = merged_df.dropna(subset=[\"decoded_string\"])[\n",
 342 |     "    [\n",
 343 |     "        \"TimeGenerated\",\n",
 344 |     "        \"Account\",\n",
 345 |     "        \"Computer\",\n",
 346 |     "        \"NewProcessName\",\n",
 347 |     "        \"CommandLine_x\",\n",
 348 |     "        \"decoded_string\",\n",
 349 |     "    ]\n",
 350 |     "]\n",
 351 |     "merged_filtered"
 352 |    ]
 353 |   },
 354 |   {
 355 |    "cell_type": "markdown",
 356 |    "metadata": {},
 357 |    "source": [
 358 |     "Now that we have the decoded string, we can look for any Indicators of Compromise (IoCs) in these strings. Using [MSTICPy's `IoCExtract`](https://msticpy.readthedocs.io/en/latest/data_analysis/IoCExtract.html) we can search all of these decoded strings for things such as IP addresses, file hashes and URLs. You can choose to search of specific indicator types by passing the `ioc_types` parameter but we want to just search for everything.<br>\n",
 359 |     "\n",
 360 |     "As with the Base64 decoding we can use a Pandas accessor `mp_ioc.extract` to call the IOC Extractor. We simply call it on the DataFrame and specify which columns to apply it to.\n",
 361 |     "\n",
 362 |     "MSTICPy has a set of common IoC patterns to search for and extract but you can also extend this by adding your own regex patterns with `add_ioc_type`."
 363 |    ]
 364 |   },
 365 |   {
 366 |    "cell_type": "code",
 367 |    "execution_count": null,
 368 |    "metadata": {},
 369 |    "outputs": [],
 370 |    "source": [
 371 |     "# Extract IoCs\n",
 372 |     "ioc_df = merged_filtered.mp_ioc.extract(columns=[\"decoded_string\"])\n",
 373 |     "display(ioc_df)"
 374 |    ]
 375 |   },
 376 |   {
 377 |    "cell_type": "markdown",
 378 |    "metadata": {},
 379 |    "source": [
 380 |     "We can also use `domain_utils` to get some other information on the domain, such as what IP addresses it resolves to."
 381 |    ]
 382 |   },
 383 |   {
 384 |    "cell_type": "code",
 385 |    "execution_count": null,
 386 |    "metadata": {},
 387 |    "outputs": [],
 388 |    "source": [
 389 |     "from msticpy.sectools.domain_utils import dns_resolve\n",
 390 |     "\n",
 391 |     "dns_info = dns_resolve(ioc_df.iloc[0][\"Observable\"])\n",
 392 |     "display(dns_info)"
 393 |    ]
 394 |   },
 395 |   {
 396 |    "cell_type": "markdown",
 397 |    "metadata": {},
 398 |    "source": [
 399 |     "<div class=\"alert alert-block alert-success\">\n",
 400 |     "    <h3>Lab Exercise 4</h3>\n",
 401 |     "Syslog data is a common data source during security analysis. The syslog data structure includes a lot of key information in a single field that can make extraction complicated. <br>\n",
 402 |     "In this exercise you will load syslog data and extract indicators from the Message field.<br>\n",
 403 |     "\n",
 404 |     "**Bonus Task**:<br>\n",
 405 |     "Identify the Base64 encoded syslog messages and extract indicators from those as well.\n",
 406 |     "\n",
 407 |     "<details>\n",
 408 |     "    <summary>Hint:</summary>\n",
 409 |     "    <ul>\n",
 410 |     "        <li>Syslog data is found under the LinuxSyslog type of queries</li>\n",
 411 |     "        <li>Core syslog data is stored in the SyslogMessage column</li>\n",
 412 |     "    </ul>\n",
 413 |     "</details>\n",
 414 |     "</div>"
 415 |    ]
 416 |   },
 417 |   {
 418 |    "cell_type": "code",
 419 |    "execution_count": null,
 420 |    "metadata": {},
 421 |    "outputs": [],
 422 |    "source": [
 423 |     "# Load syslog data\n"
 424 |    ]
 425 |   },
 426 |   {
 427 |    "cell_type": "code",
 428 |    "execution_count": null,
 429 |    "metadata": {},
 430 |    "outputs": [],
 431 |    "source": [
 432 |     "# Extract URL indicators from the SyslogMessage column and get a unique list of indicators found\n"
 433 |    ]
 434 |   },
 435 |   {
 436 |    "cell_type": "code",
 437 |    "execution_count": null,
 438 |    "metadata": {},
 439 |    "outputs": [],
 440 |    "source": [
 441 |     "# Decode Base64 data and extract indicators\n",
 442 |     "\n",
 443 |     "# get a list of decoded strings\n",
 444 |     "\n",
 445 |     "# Extract dns indicators from these strings (use the full_decoded_string column)\n"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "markdown",
 450 |    "metadata": {},
 451 |    "source": [
 452 |     "## Data Visualization\n",
 453 |     "Data visualization is a key tool in any data analyis scenario and the same is true during security analysis. MSTICPy contains a number of visualizations, below we will plot locations on a map to help identify anomalous logon locations, showing a graph of security alerts, and plotting a process tree showing process executions on a host.<br>\n",
 454 |     "\n",
 455 |     "*MSTICpy uses [Bokeh](https://bokeh.org/) and [Folium](https://python-visualization.github.io/folium/#) to power its visualization features.*\n",
 456 |     "\n",
 457 |     "The first thing we need to do is get some data to plot. Here we will use Azure AD signin events. These events include the location the login occured from allowing us to easily plot them on a map for geospatial analysis."
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "code",
 462 |    "execution_count": null,
 463 |    "metadata": {},
 464 |    "outputs": [],
 465 |    "source": [
 466 |     "# Plot IP geolocation on a map\n",
 467 |     "loc_data = qry_prov.Azure.list_all_signins_geo()\n",
 468 |     "loc_data.head()"
 469 |    ]
 470 |   },
 471 |   {
 472 |    "cell_type": "markdown",
 473 |    "metadata": {},
 474 |    "source": [
 475 |     "Before we can plot the data we need to format the raw data into a known format. MSTICPy has a number of [defined entities](https://msticpy.readthedocs.io/en/latest/msticpy.datamodel.html?highlight=entitie), one of which is `Ip`. The entity has a location property, so by mapping the columns in our data to the properties of these entities we can easily format the whole dataset in a series of entities.\n",
 476 |     "\n",
 477 |     "From there we can then use MSTICPy's [`FoliumMap`](https://msticpy.readthedocs.io/en/latest/visualization/FoliumMap.html) feature to plot these entities on a map."
 478 |    ]
 479 |   },
 480 |   {
 481 |    "cell_type": "code",
 482 |    "execution_count": null,
 483 |    "metadata": {},
 484 |    "outputs": [],
 485 |    "source": [
 486 |     "ip_ents = []\n",
 487 |     "\n",
 488 |     "\n",
 489 |     "def format_ips(row):\n",
 490 |     "    ip_ent = entities.ip_address.Ip(Address=row[\"IPAddress\"])\n",
 491 |     "    loc = entities.GeoLocation(\n",
 492 |     "        Longitude=float(row[\"Longitude\"]), Latitude=float(row[\"Latitude\"])\n",
 493 |     "    )\n",
 494 |     "    ip_ent.Location = loc\n",
 495 |     "    ip_ents.append(ip_ent)\n",
 496 |     "\n",
 497 |     "\n",
 498 |     "# Format dataset into entities\n",
 499 |     "loc_data.apply(format_ips, axis=1)\n",
 500 |     "# Create Map plot\n",
 501 |     "folium_map = FoliumMap(zoom_start=2)\n",
 502 |     "# Add IP entities to the map\n",
 503 |     "folium_map.add_ip_cluster(ip_entities=ip_ents, color=\"blue\")\n",
 504 |     "# Center the map around the plotted entities\n",
 505 |     "folium_map.center_map()\n",
 506 |     "# Display the map\n",
 507 |     "folium_map"
 508 |    ]
 509 |   },
 510 |   {
 511 |    "cell_type": "markdown",
 512 |    "metadata": {},
 513 |    "source": [
 514 |     "Another useful visualization is a graph plot that shows connections between events. This is particularily useful when looking at data item such as alerts that contain a lot of embedded data such as affected hosts and users. By graph plotting alert data we can see the connections between them that might help a security analyst get a better understanding of an intrusion.\n",
 515 |     "\n",
 516 |     "Using [`EntityGraph`](https://msticpy.readthedocs.io/en/latest/visualization/EntityGraph.html?highlight=entity%20graph) we can create a [NetworkX](https://networkx.org/) representation of our alerts and their entities. We then call `.plot` to display the graph using [Bokeh](https://bokeh.org/)."
 517 |    ]
 518 |   },
 519 |   {
 520 |    "cell_type": "code",
 521 |    "execution_count": null,
 522 |    "metadata": {},
 523 |    "outputs": [],
 524 |    "source": [
 525 |     "from msticpy.vis.entity_graph_tools import EntityGraph\n",
 526 |     "\n",
 527 |     "# Entity Graph\n",
 528 |     "alert_df = qry_prov.SecurityAlert.list_alerts()\n",
 529 |     "grph = EntityGraph(alert_df)\n",
 530 |     "grph.plot()"
 531 |    ]
 532 |   },
 533 |   {
 534 |    "cell_type": "markdown",
 535 |    "metadata": {},
 536 |    "source": [
 537 |     "Another common visualization in security tooling is the process tree. This shows the hierarchical relationship of processes executed on a host.\n",
 538 |     "\n",
 539 |     "MSTICPy has functions to both build and plot these process trees based off Windows process creation events. More details on these functions can be found [here](https://msticpy.readthedocs.io/en/latest/visualization/ProcessTree.html)."
 540 |    ]
 541 |   },
 542 |   {
 543 |    "cell_type": "code",
 544 |    "execution_count": null,
 545 |    "metadata": {},
 546 |    "outputs": [],
 547 |    "source": [
 548 |     "# Before plotting a process tree we need to get data related to process creation events\n",
 549 |     "proc_df = qry_prov.WindowsSecurity.get_process_tree()\n",
 550 |     "proc_df.head()"
 551 |    ]
 552 |   },
 553 |   {
 554 |    "cell_type": "markdown",
 555 |    "metadata": {},
 556 |    "source": [
 557 |     "The process tree visualization can be called on a DataFrame using a Pandas accessor `mp_plot.process_tree()`"
 558 |    ]
 559 |   },
 560 |   {
 561 |    "cell_type": "code",
 562 |    "execution_count": null,
 563 |    "metadata": {},
 564 |    "outputs": [],
 565 |    "source": [
 566 |     "proc_df.mp_plot.process_tree()"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "markdown",
 571 |    "metadata": {},
 572 |    "source": [
 573 |     "Temporal analaysis is another key tool in security investigation. Seeing in which order events occur, and how events cluster temporally can provide some invaluable insights. To help with this MSTICPy contains a flexible [timeline feature](https://msticpy.readthedocs.io/en/latest/visualization/EventTimeline.html) that allows for the plotting of a range of data on a timeline. You can plot simple single catagory descrite events, running values, and multi series events all in an interactive [Bokeh](https://bokeh.org/) visualization.\n",
 574 |     "\n",
 575 |     "Using the timeline is a simple as calling `mp_plot.timeline()` on a DataFrame. By default this will use the TimeGenerated column for the time element, and a set of common column values to display when hovering over an event. These can be customized with the `time_column` and `source_columns` parameters (as used below).\n"
 576 |    ]
 577 |   },
 578 |   {
 579 |    "cell_type": "code",
 580 |    "execution_count": null,
 581 |    "metadata": {},
 582 |    "outputs": [],
 583 |    "source": [
 584 |     "# Get some data to plot\n",
 585 |     "alert_df = qry_prov.SecurityAlert.list_alerts()\n",
 586 |     "# Plot these values on a timeline based on when they were generated\n",
 587 |     "alert_df.mp_plot.timeline(source_columns=[\"AlertName\"])"
 588 |    ]
 589 |   },
 590 |   {
 591 |    "cell_type": "markdown",
 592 |    "metadata": {},
 593 |    "source": [
 594 |     "It's also possible to group events by a column to show them as separate rows in the timeline. This is done by passing the column you want to split on as `group_by` - below we are grouping by the alert severity.\n",
 595 |     "\n",
 596 |     "There are also many other ways to customize this timeline. Please read the [full documentation](https://msticpy.readthedocs.io/en/latest/msticpy.nbtools.html#msticpy.nbtools.timeline.display_timeline) to see a list of options."
 597 |    ]
 598 |   },
 599 |   {
 600 |    "cell_type": "code",
 601 |    "execution_count": null,
 602 |    "metadata": {},
 603 |    "outputs": [],
 604 |    "source": [
 605 |     "alert_df.mp_plot.timeline(source_columns=[\"AlertName\"], group_by=\"Severity\")"
 606 |    ]
 607 |   },
 608 |   {
 609 |    "cell_type": "markdown",
 610 |    "metadata": {},
 611 |    "source": [
 612 |     "<div class=\"alert alert-block alert-success\">\n",
 613 |     "    <h3>Lab Exercise 5</h3>\n",
 614 |     "    In this lab you are going to plot your own timeline of events.<br>\n",
 615 |     "    The timeline show plot Windows Host Logon events (.WindowsSecurity.list_host_logons).<br>\n",
 616 |     "    You should group these by the logon type, and the hover over should show the user account logging in and what IP address they logged in from.\n",
 617 |     "    \n",
 618 |     "<details>\n",
 619 |     "    <summary>Hint:</summary>\n",
 620 |     "    <ul>\n",
 621 |     "        <li>Grouping is passed with the 'group_by` parameter.</li>\n",
 622 |     "        <li>Hover over values are set with the 'source_columns' parameter.</li>\n",
 623 |     "    </ul>\n",
 624 |     "</details>\n",
 625 |     "</div>"
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "code",
 630 |    "execution_count": null,
 631 |    "metadata": {},
 632 |    "outputs": [],
 633 |    "source": [
 634 |     "# Load data\n",
 635 |     "\n",
 636 |     "\n",
 637 |     "# Plot the timeline\n",
 638 |     "\n"
 639 |    ]
 640 |   },
 641 |   {
 642 |    "cell_type": "markdown",
 643 |    "metadata": {},
 644 |    "source": [
 645 |     "## Pivots in MSTICPy\n",
 646 |     "\n",
 647 |     "MSTICPy has a lot of functionality distributed across many classes and modules. However, there is no simple way to discover where these functions are and what types of data the function is relevant to.\n",
 648 |     "\n",
 649 |     "[Pivot functions](https://msticpy.readthedocs.io/en/latest/data_analysis/PivotFunctions.html) bring this functionality together grouped around Entities. Entities are representations of real-world objects found commonly in CyberSec investigations. Some examples are: IpAddress, Host, Account, URL.\n",
 650 |     "\n",
 651 |     "In the following cells we look at how pivot functions can be used to easily access MSTICPy functionality relevent to the indicator being investigated.\n",
 652 |     "\n",
 653 |     "<div class=\"alert alert-block alert-warning\">\n",
 654 |     "<b>Note:</b> When you initialize the Pivot provider you will get a configuration error warning. This is not a problem (we are not using these features in this lab), we have included it to give you an example of the customized warnings in MSTICPy. These are designed to help users when running MSTICPy in notebooks, you can see that the error provides instructions and guidance to resolve issues (you don't have to resolve this one though!).\n",
 655 |     "</div>\n",
 656 |     "\n",
 657 |     "We first start by initalizing our Pivots:"
 658 |    ]
 659 |   },
 660 |   {
 661 |    "cell_type": "code",
 662 |    "execution_count": null,
 663 |    "metadata": {},
 664 |    "outputs": [],
 665 |    "source": [
 666 |     "from msticpy.datamodel.pivot import Pivot\n",
 667 |     "Pivot(namespace=globals())"
 668 |    ]
 669 |   },
 670 |   {
 671 |    "cell_type": "markdown",
 672 |    "metadata": {},
 673 |    "source": [
 674 |     "Once loaded we can take a look at the available Pivots.\n",
 675 |     "\n",
 676 |     "<div class=\"alert alert-block alert-info\">\n",
 677 |     "<b>Note:</b> The available Pivots are based on the providers we have loaded so if you have additional providers loaded you will have more Pivots available to you.</div>\n",
 678 |     "\n"
 679 |    ]
 680 |   },
 681 |   {
 682 |    "cell_type": "code",
 683 |    "execution_count": null,
 684 |    "metadata": {},
 685 |    "outputs": [],
 686 |    "source": [
 687 |     "# Once loaded we can browse what pivots are available in an interactive widget\n",
 688 |     "Pivot.browse()"
 689 |    ]
 690 |   },
 691 |   {
 692 |    "cell_type": "markdown",
 693 |    "metadata": {},
 694 |    "source": [
 695 |     "To begin to Pivot we first need to get an entity to Pivot on. For this lab we are going to use an IP address entity, and once extracted our first Pivot will be to see what sort of IP address it is using as the `ip_type` Pivot.\n",
 696 |     "\n",
 697 |     "In this example we are running the Pivot on a single indicator, however many Pivots also let you apply them to an entire dataframe (you will see this in the next Pivot example)."
 698 |    ]
 699 |   },
 700 |   {
 701 |    "cell_type": "code",
 702 |    "execution_count": null,
 703 |    "metadata": {},
 704 |    "outputs": [],
 705 |    "source": [
 706 |     "# Get an IP Address to pivot on\n",
 707 |     "ip_df = qry_prov.Network.list_azure_network_flows_by_ip()\n",
 708 |     "ip = ip_df.iloc[0][\"VMIPAddress\"]\n",
 709 |     "# See what type of IP address we are working with.\n",
 710 |     "entities.IpAddress.util.ip_type(ip_str=ip)"
 711 |    ]
 712 |   },
 713 |   {
 714 |    "cell_type": "markdown",
 715 |    "metadata": {},
 716 |    "source": [
 717 |     "As well as individual Pivots you can chain them together to perform several actions on a dataset.<br>\n",
 718 |     "More information on [Pivots can be found in the MSTICPy documentation](https://msticpy.readthedocs.io/en/latest/data_analysis/PivotFunctions.html)\n",
 719 |     "\n",
 720 |     "In the cell below we are taking a dataframe containing command line data addresses and applying a chain of Pivots to them.<br>\n",
 721 |     "The chain in use does the following:<br>\n",
 722 |     "<ul>\n",
 723 |     "    <li>Extracts IoCs from the command lines in the data</li>\n",
 724 |     "    <li>Filters to only logs that contained IoCs</li>\n",
 725 |     "    <li>Further filters to where the IoCs were IP addresses</li>\n",
 726 |     "    <li>Looks up those IP address in threat intelligence</li>\n",
 727 |     "    <li>Filters to only those events where there was a match in threat intelligence</li>\n",
 728 |     "</ul>"
 729 |    ]
 730 |   },
 731 |   {
 732 |    "cell_type": "code",
 733 |    "execution_count": null,
 734 |    "metadata": {},
 735 |    "outputs": [],
 736 |    "source": [
 737 |     "# Get some command line data\n",
 738 |     "cmdl_data = qry_prov.LinuxSyslog.list_all_syslog_events()\n",
 739 |     "# Extract IoCs from command lines\n",
 740 |     "(\n",
 741 |     "    cmdl_data.head(500).mp_pivot.run(\n",
 742 |     "        entities.Process.util.extract_iocs, column=\"SyslogMessage\", join=\"left\"\n",
 743 |     "    )\n",
 744 |     "    # Filter where there were IoCs found\n",
 745 |     "    .dropna(subset=[\"Observable\"])\n",
 746 |     "    # Filter to only IP IoCs\n",
 747 |     "    .query(\"IoCType == 'ipv4'\")\n",
 748 |     "    # Lookup IoCs in threat intel\n",
 749 |     "    .mp_pivot.run(\n",
 750 |     "        entities.IpAddress.ti.lookup_ipv4_GreyNoise, column=\"Observable\", join=\"left\"\n",
 751 |     "    )\n",
 752 |     "    # Filter to where the IPs were found in threat intel\n",
 753 |     "    .query(\"Status != 404\")\n",
 754 |     ")"
 755 |    ]
 756 |   },
 757 |   {
 758 |    "cell_type": "markdown",
 759 |    "metadata": {},
 760 |    "source": [
 761 |     "<div class=\"alert alert-block alert-success\">\n",
 762 |     "\n",
 763 |     "<h3>Lab Exercise 6</h3>\n",
 764 |     "Create your own pivot pipeline that does the following:<br>\n",
 765 |     "<ul>\n",
 766 |     "    <li>Takes the dataframe created for you (net_df)</li>\n",
 767 |     "    <li>Gets the type of IP address and joins this to the data</li>\n",
 768 |     "    <li>Selects only the Public IP addresses in the dataset</li>\n",
 769 |     "    <li>Filters to only 100 events</li>\n",
 770 |     "    <li>Looks up the IP addresses in threat intelligence feeds using GreyNoise</li>\n",
 771 |     "    <li>Selects only the IP addresses where there is a match in threat intelligence</li>\n",
 772 |     "</ul>\n",
 773 |     "\n",
 774 |     "<details>\n",
 775 |     "    <summary>Hint:</summary>\n",
 776 |     "    <ul>\n",
 777 |     "        <li>.head(100) will filter a Pandas dataframe to 100 rows</li>\n",
 778 |     "        <li>when calling pivot.run the parameter join='left' can be used to join the resulting dataframe into the original dataframe</li>\n",
 779 |     "        <li>Posting threat intel results from GreyNoise can be distinguished by the status not being 404</li>\n",
 780 |     "    </ul>\n",
 781 |     "</details>\n",
 782 |     "</div>"
 783 |    ]
 784 |   },
 785 |   {
 786 |    "cell_type": "code",
 787 |    "execution_count": null,
 788 |    "metadata": {},
 789 |    "outputs": [],
 790 |    "source": [
 791 |     "net_df = qry_prov.Network.list_azure_network_flows_by_ip()\n",
 792 |     "net_df[\"PublicIPs\"] = (\n",
 793 |     "    net_df[\"PublicIPs\"]\n",
 794 |     "    .str.strip(\"['']\")\n",
 795 |     "    .str.replace(\"'\", \"\")\n",
 796 |     "    .str.replace(\" \", \"\")\n",
 797 |     "    .str.split(\",\")\n",
 798 |     ")\n",
 799 |     "net_df = net_df.assign(IPs=net_df[\"PublicIPs\"].explode(\"IPs\"))\n",
 800 |     "net_df.dropna(subset=[\"IPs\"], inplace=True)\n",
 801 |     "# Create the pivot chain"
 802 |    ]
 803 |   },
 804 |   {
 805 |    "cell_type": "markdown",
 806 |    "metadata": {},
 807 |    "source": [
 808 |     "## MSTICPy's ML Features\n",
 809 |     "\n",
 810 |     "MSTICPy has a number of basic ML features to support simple analysis that is common in security investigaiton. In the following section we will look at two of those; timeseries analysis and clustering.\n",
 811 |     "\n",
 812 |     "In order to effectively hunt in a dataset analysts need to focus on specific events of interest. Below we use MSTICpy's [time series analysis](https://msticpy.readthedocs.io/en/latest/msticpy.analysis.html?highlight=timeseries#module-msticpy.analysis.timeseries) machine learning capabilities to identify anomalies in our network traffic for further investigation.<br>\n",
 813 |     "As well as computing anomalies we visualize the data so that we can more easily see where these anomalies present themselves.\n"
 814 |    ]
 815 |   },
 816 |   {
 817 |    "cell_type": "code",
 818 |    "execution_count": null,
 819 |    "metadata": {},
 820 |    "outputs": [],
 821 |    "source": [
 822 |     "# Import MSTICPy's timeseries specfic features\n",
 823 |     "from msticpy.analysis.timeseries import timeseries_anomalies_stl\n",
 824 |     "from msticpy.nbtools.timeseries import display_timeseries_anomolies\n",
 825 |     "\n",
 826 |     "# Load some network data to apply our analysis to\n",
 827 |     "stldemo = qry_prov.Network.get_network_summary()\n",
 828 |     "\n",
 829 |     "# Conduct our timeseries analyis\n",
 830 |     "output = timeseries_anomalies_stl(stldemo)\n",
 831 |     "\n",
 832 |     "# Visualize the timeseries and any anomalies\n",
 833 |     "display_timeseries_anomolies(data=output, y=\"TotalBytesSent\")"
 834 |    ]
 835 |   },
 836 |   {
 837 |    "cell_type": "markdown",
 838 |    "metadata": {},
 839 |    "source": [
 840 |     "<div class=\"alert alert-block alert-success\">\n",
 841 |     "\n",
 842 |     "<h3>Lab Exercise 7</h3>\n",
 843 |     "Using the timeline above answer the following questions:\n",
 844 |     "\n",
 845 |     "<details>\n",
 846 |     "    <summary>Hint:</summary>\n",
 847 |     "    <ul>\n",
 848 |     "        <li>Hover over points on the timeline to see additional information</li>\n",
 849 |     "    </ul>\n",
 850 |     "</details>\n",
 851 |     "</div>"
 852 |    ]
 853 |   },
 854 |   {
 855 |    "cell_type": "code",
 856 |    "execution_count": null,
 857 |    "metadata": {},
 858 |    "outputs": [],
 859 |    "source": [
 860 |     "import ipywidgets as widgets\n",
 861 |     "\n",
 862 |     "md(\"On what date did the two network data anomalies occur?\", \"bold\")\n",
 863 |     "date = widgets.DatePicker(description=\"Pick a Date\", disabled=False)\n",
 864 |     "display(date)\n",
 865 |     "\n",
 866 |     "md(\"How many bytes were sent on at 2020-07-06 19:00?\", \"bold\")\n",
 867 |     "bytesa = widgets.Text(description=\"Answer:\", disabled=False)\n",
 868 |     "display(bytesa)"
 869 |    ]
 870 |   },
 871 |   {
 872 |    "cell_type": "code",
 873 |    "execution_count": null,
 874 |    "metadata": {},
 875 |    "outputs": [],
 876 |    "source": [
 877 |     "if str(date.value) == answers[\"question1\"] and bytesa.value == answers[\"question2\"]:\n",
 878 |     "    md(\n",
 879 |     "        f\"Correct, the anomolies occured on {date.value} and {bytesa.value} bytes were transfered at 2020-07-06 19:00\"\n",
 880 |     "    )\n",
 881 |     "else:\n",
 882 |     "    md(\"One of your answers is incorrect please try again\")"
 883 |    ]
 884 |   },
 885 |   {
 886 |    "cell_type": "markdown",
 887 |    "metadata": {},
 888 |    "source": [
 889 |     "### Logon Sessions\n",
 890 |     "Logon events are key to understanding any host based activity. We have previously used MSTICpy's [timeline feature](https://msticpy.readthedocs.io/en/latest/visualization/EventTimeline.html) to display value based data from our timeseries analayis. However, we can also use it to display multiple types of discrete data on the same timeline. This is particularly useful for Windows logon events where we plot different logon types (interactive, network, etc.) in different horizontal series.<br>\n",
 891 |     "We can split the plot by simply providing it a column to split on, with the parameter `group_by`."
 892 |    ]
 893 |   },
 894 |   {
 895 |    "cell_type": "code",
 896 |    "execution_count": null,
 897 |    "metadata": {},
 898 |    "outputs": [],
 899 |    "source": [
 900 |     "# Acquire data using a built in query\n",
 901 |     "host_logons = qry_prov.WindowsSecurity.list_host_logons()\n",
 902 |     "\n",
 903 |     "# Display timeline\n",
 904 |     "tooltip_cols = [\"TimeGenerated\", \"Account\", \"LogonType\", \"TimeGenerated\"]\n",
 905 |     "nbdisplay.display_timeline(\n",
 906 |     "    data=host_logons,\n",
 907 |     "    title=\"Host Logons\",\n",
 908 |     "    source_columns=tooltip_cols,\n",
 909 |     "    group_by=\"LogonType\",\n",
 910 |     "    height=200,\n",
 911 |     ")"
 912 |    ]
 913 |   },
 914 |   {
 915 |    "cell_type": "markdown",
 916 |    "metadata": {},
 917 |    "source": [
 918 |     "When presented with a large number of events such as we have here it's useful to cluster these into a more manageable number of groups. MSTICpy contains [clustering features](https://msticpy.readthedocs.io/en/latest/msticpy.sectools.html?highlight=cluster_events#msticpy.sectools.eventcluster.dbcluster_events) that can be used against a number of data types. Once clustering is complete we use another [widget](https://msticpy.readthedocs.io/en/latest/msticpy.nbtools.html?highlight=SelectItem#msticpy.nbtools.nbwidgets.SelectItem) to let the user select the cluster they want to focus on."
 919 |    ]
 920 |   },
 921 |   {
 922 |    "cell_type": "code",
 923 |    "execution_count": null,
 924 |    "metadata": {},
 925 |    "outputs": [],
 926 |    "source": [
 927 |     "from msticpy.analysis.eventcluster import (\n",
 928 |     "    _string_score,\n",
 929 |     "    add_process_features,\n",
 930 |     "    dbcluster_events,\n",
 931 |     ")\n",
 932 |     "\n",
 933 |     "# Get data and convert some values into numericals\n",
 934 |     "logon_features = host_logons.copy()\n",
 935 |     "logon_features[\"AccountNum\"] = host_logons.apply(\n",
 936 |     "    lambda x: _string_score(x.Account), axis=1\n",
 937 |     ")\n",
 938 |     "logon_features[\"TargetUserNum\"] = host_logons.apply(\n",
 939 |     "    lambda x: _string_score(x.TargetUserName), axis=1\n",
 940 |     ")\n",
 941 |     "logon_features[\"LogonHour\"] = host_logons.apply(lambda x: x.TimeGenerated.hour, axis=1)\n",
 942 |     "\n",
 943 |     "# run clustering\n",
 944 |     "(clus_logons, _, _) = dbcluster_events(\n",
 945 |     "    data=logon_features,\n",
 946 |     "    time_column=\"TimeGenerated\",\n",
 947 |     "    cluster_columns=[\"AccountNum\", \"LogonType\", \"TargetUserNum\"],\n",
 948 |     "    max_cluster_distance=0.0001,\n",
 949 |     ")\n",
 950 |     "\n",
 951 |     "# Sort and format the clustering scores to group similar logon events into sessions\n",
 952 |     "dist_logons = clus_logons.sort_values(\"TimeGenerated\")[\n",
 953 |     "    [\"TargetUserName\", \"TimeGenerated\", \"LastEventTime\", \"LogonType\", \"ClusterSize\"]\n",
 954 |     "]\n",
 955 |     "dist_logons = dist_logons.apply(\n",
 956 |     "    lambda x: (\n",
 957 |     "        f\"{x.TargetUserName}:    \"\n",
 958 |     "        f\"(logontype {x.LogonType})   \"\n",
 959 |     "        f\"timerange: {x.TimeGenerated} - {x.LastEventTime}    \"\n",
 960 |     "        f\"count: {x.ClusterSize}\"\n",
 961 |     "    ),\n",
 962 |     "    axis=1,\n",
 963 |     ")\n",
 964 |     "# Extract the distinct sessions\n",
 965 |     "dist_logons = {v: k for k, v in dist_logons.to_dict().items()}\n",
 966 |     "\n",
 967 |     "\n",
 968 |     "def show_logon(idx):\n",
 969 |     "    return nbdisplay.format_logon(pd.DataFrame(clus_logons.loc[idx]).T)\n",
 970 |     "\n",
 971 |     "\n",
 972 |     "# Display the sessions in a selection widget for later use\n",
 973 |     "logon_wgt = nbwidgets.SelectItem(\n",
 974 |     "    description=\"Select logon cluster to examine\",\n",
 975 |     "    item_dict=dist_logons,\n",
 976 |     "    action=show_logon,\n",
 977 |     "    height=\"200px\",\n",
 978 |     "    width=\"100%\",\n",
 979 |     "    auto_display=True,\n",
 980 |     ")"
 981 |    ]
 982 |   },
 983 |   {
 984 |    "cell_type": "markdown",
 985 |    "metadata": {},
 986 |    "source": [
 987 |     "---\n",
 988 |     "\n",
 989 |     "<h1 style=\"border: 1px solid;background-color: LightGray; padding: 10px\">Summary</h1>\n",
 990 |     "\n",
 991 |     "MSTICPy has many features, in the lab you have only just started to scratch the surface. We have many more features to explore.<br>\n",
 992 |     "In addition MSTICPy is a work in progress and we are very open to contributions, improvements, feedback, and feature requests from the community.\n",
 993 |     "\n",
 994 |     "---\n",
 995 |     "\n",
 996 |     "<h1 style=\"border: 1px solid;background-color: LightGray; padding: 10px\">Resources</h1>\n",
 997 |     "\n",
 998 |     "MSTICPy Documentation - https://msticpy.readthedocs.io<br>\n",
 999 |     "GitHub repo - https://github.com/microsoft/msticpy<br>\n",
1000 |     "Blog - https://msticpy.medium.com<br>\n",
1001 |     "\n",
1002 |     "Sample notebooks:\n",
1003 |     "- https://github.com/microsoft/msticpy/tree/master/docs/notebooks\n",
1004 |     "- https://github.com/Azure/Azure-Sentinel-Notebooks\n",
1005 |     "\n",
1006 |     "\n",
1007 |     "<h1 style=\"border: 1px solid;background-color: LightGray; padding: 10px\">Contacts</h1>\n",
1008 |     "\n",
1009 |     "MSTICPy is built and maintained by:\n",
1010 |     "<ul>\n",
1011 |     "    <li>Ian Hellen</li>\n",
1012 |     "    <li>Pete Bryan</li>\n",
1013 |     "    <li>Ashwin Patil</li>\n",
1014 |     "</ul>\n",
1015 |     "\n",
1016 |     "If you have any questions please reach out to us on [GitHub](https://github.com/microsoft/msticpy) or at:<br>\n",
1017 |     "Email - msticpy@microsoft.com<br>\n",
1018 |     "Twitter - [@msticpy](https://twitter.com/msticpy), [@ianhellen Ian Hellen](https://twitter.com/ianhellen), [@MSSPete (Pete Bryan)](https://twitter.com/MSSPete), [@AshwinPatil (Ashwin Patil)](https://twitter.com/ashwinpatil)<br>\n",
1019 |     "GitHub - [@ianhelle](https://github.com/ianhelle), [@PeteBryan](https://github.com/petebryan), [@Ashwin-Patil](https://github.com/ashwin-patil)<br>\n",
1020 |     "LinkedIn - [@ianhellen](https://www.linkedin.com/in/ianhellen/), [@PeteBryan](https://www.linkedin.com/in/peter-bryan-77588473/), [@AshwinPatil](https://www.linkedin.com/in/ashwinrp/)"
1021 |    ]
1022 |   },
1023 |   {
1024 |    "cell_type": "markdown",
1025 |    "metadata": {},
1026 |    "source": []
1027 |   }
1028 |  ],
1029 |  "metadata": {
1030 |   "kernelspec": {
1031 |    "display_name": "Python 3 (ipykernel)",
1032 |    "language": "python",
1033 |    "name": "python3"
1034 |   },
1035 |   "language_info": {
1036 |    "codemirror_mode": {
1037 |     "name": "ipython",
1038 |     "version": 3
1039 |    },
1040 |    "file_extension": ".py",
1041 |    "mimetype": "text/x-python",
1042 |    "name": "python",
1043 |    "nbconvert_exporter": "python",
1044 |    "pygments_lexer": "ipython3",
1045 |    "version": "3.8.12"
1046 |   },
1047 |   "widgets": {
1048 |    "application/vnd.jupyter.widget-state+json": {
1049 |     "state": {},
1050 |     "version_major": 2,
1051 |     "version_minor": 0
1052 |    }
1053 |   }
1054 |  },
1055 |  "nbformat": 4,
1056 |  "nbformat_minor": 4
1057 | }
1058 | 


--------------------------------------------------------------------------------