├── .gitattributes
├── Data
    ├── Bro log from Threatglass
    │   ├── HTML_Bro_log_1
    │   │   ├── app_stats.html
    │   │   ├── conn.html
    │   │   ├── dhcp.html
    │   │   ├── dns.html
    │   │   ├── dpd.html
    │   │   ├── files.html
    │   │   ├── ftp.html
    │   │   ├── http.html
    │   │   ├── irc.html
    │   │   ├── loaded_scripts.html
    │   │   ├── notice.html
    │   │   ├── packet_filter.html
    │   │   ├── ssl.html
    │   │   └── weird.html
    │   ├── HTML_Bro_log_2
    │   │   ├── app_stats.html
    │   │   ├── conn.html
    │   │   ├── dhcp.html
    │   │   ├── dns.html
    │   │   ├── dpd.html
    │   │   ├── files.html
    │   │   ├── ftp.html
    │   │   ├── http.html
    │   │   ├── irc.html
    │   │   ├── loaded_scripts.html
    │   │   ├── notice.html
    │   │   ├── packet_filter.html
    │   │   ├── ssl.html
    │   │   └── weird.html
    │   └── HTML_Bro_log_3
    │   │   ├── app_stats.html
    │   │   ├── conn.html
    │   │   ├── dhcp.html
    │   │   ├── dns.html
    │   │   ├── dpd.html
    │   │   ├── files.html
    │   │   ├── http.html
    │   │   ├── irc.html
    │   │   ├── loaded_scripts.html
    │   │   ├── notice.html
    │   │   ├── packet_filter.html
    │   │   ├── ssl.html
    │   │   └── weird.html
    ├── Bro logs from Threatglass datasets
    │   └── Bro logs from Threatglass datasets.txt
    ├── PE malware dataset description
    │   ├── OPCleaver.html
    │   ├── VirusShare.html
    │   └── Zeus.html
    ├── PE malware datasets
    │   └── PE malware datasets.txt
    ├── System datasets description
    │   └── auth.html
    ├── System datasets
    │   └── System datasets.txt
    ├── network datasets description
    │   ├── dhcp.html
    │   ├── dns.html
    │   ├── files.html
    │   ├── ftp.html
    │   ├── http.html
    │   ├── notice.html
    │   ├── smtp.html
    │   ├── ssh.html
    │   ├── ssl.html
    │   ├── tunnel.html
    │   └── weird.html
    └── network datasets
    │   └── network datasets.txt
├── Data_analysis
    ├── Bro Logs from Threatglass
    │   ├── Part 1
    │   │   ├── app_stats analysis.ipynb
    │   │   ├── conn analysis.ipynb
    │   │   ├── dhcp analysis.ipynb
    │   │   ├── dns analysis.ipynb
    │   │   ├── dpd analysis.ipynb
    │   │   ├── files analysis.ipynb
    │   │   ├── ftp analysis.ipynb
    │   │   ├── http analysis.ipynb
    │   │   ├── irc analysis.ipynb
    │   │   ├── loaded_scripts analysis.ipynb
    │   │   ├── notice analysis.ipynb
    │   │   ├── packet_filter analysis.ipynb
    │   │   ├── ssl analysis.ipynb
    │   │   └── weird analysis.ipynb
    │   ├── Part 2
    │   │   ├── app_stats analysis.ipynb
    │   │   ├── conn analysis.ipynb
    │   │   ├── dhcpanalysis.ipynb
    │   │   ├── dns analysis.ipynb
    │   │   ├── dpd analysis.ipynb
    │   │   ├── files analysis.ipynb
    │   │   ├── ftp analysis.ipynb
    │   │   ├── http analysis.ipynb
    │   │   ├── irc analysis.ipynb
    │   │   ├── loaded_scripts analysis.ipynb
    │   │   ├── notice analysis.ipynb
    │   │   ├── packet_filter analysis.ipynb
    │   │   ├── ssl analysis.ipynb
    │   │   └── weird analysis.ipynb
    │   └── Part 3
    │   │   ├── app_stats analysis.ipynb
    │   │   ├── conn analysis.ipynb
    │   │   ├── dhcp analysis.ipynb
    │   │   ├── dns analysis.ipynb
    │   │   ├── dpd analysis.ipynb
    │   │   ├── files analysis.ipynb
    │   │   ├── http analysis.ipynb
    │   │   ├── irc analysis.ipynb
    │   │   ├── loaded_scripts analysis.ipynb
    │   │   ├── notice analysis.ipynb
    │   │   ├── packet_filter analysis.ipynb
    │   │   ├── ssl analysis.ipynb
    │   │   └── weird analysis.ipynb
    ├── Network analysis
    │   ├── Dhcp_analysis_practice_2.ipynb
    │   ├── dhcp analysis.ipynb
    │   ├── dns analysis.ipynb
    │   ├── ftp analysis.ipynb
    │   ├── notice analysis.ipynb
    │   ├── smtp analysis.ipynb
    │   ├── ssh analysis.ipynb
    │   ├── ssl analysis.ipynb
    │   ├── tunnel analysis.ipynb
    │   └── weird analysis.ipynb
    ├── PE malware analysis
    │   ├── Malware_analysis_practice.ipynb
    │   ├── OP Cleaver Analysis.ipynb
    │   ├── VirusShare Analysis-checkpoint.ipynb
    │   └── Zeus Analysis.ipynb
    └── System analysis
    │   └── auth analysis.ipynb
├── Machine_learning_practice
    └── machine_learning.py
├── README.md
└── Scripts
    ├── LogToCsv.py
    ├── LogtoCsvConverter.py
    ├── NetworkLogToCSV.py
    ├── System_Squid_LogToCSV.py
    ├── featureExtraction.py
    └── html_Generator.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.csv filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/app_stats.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>APP_STATS </h1><p>Download: <a href='http://www.secrepo.com'>app_stats  </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>942</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Network Protocols</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>6</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'></p><h3>Attribute Information</h3><ul><li>ts: Timestamp of request</li><li>ts_delta: Time difference from previous measurement</li><li>app: Name of application (YouTube, Netflix, etc.)</li><li>uniq_hosts: Number of unique hosts that used app</li><li>hits: Number of visits to app</li><li>bytes: Total bytes transferred to/from app</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>ts</th>
14 |       <td>float64</td>
15 |       <td>942</td>
16 |       <td>798</td>
17 |       <td>0</td>
18 |     </tr>
19 |     <tr>
20 |       <th>ts_delta</th>
21 |       <td>float64</td>
22 |       <td>942</td>
23 |       <td>1</td>
24 |       <td>0</td>
25 |     </tr>
26 |     <tr>
27 |       <th>app</th>
28 |       <td>object</td>
29 |       <td>942</td>
30 |       <td>3</td>
31 |       <td>0</td>
32 |     </tr>
33 |     <tr>
34 |       <th>uniq_hosts</th>
35 |       <td>int64</td>
36 |       <td>942</td>
37 |       <td>1</td>
38 |       <td>0</td>
39 |     </tr>
40 |     <tr>
41 |       <th>hits</th>
42 |       <td>int64</td>
43 |       <td>942</td>
44 |       <td>47</td>
45 |       <td>0</td>
46 |     </tr>
47 |     <tr>
48 |       <th>bytes</th>
49 |       <td>int64</td>
50 |       <td>942</td>
51 |       <td>770</td>
52 |       <td>0</td>
53 |     </tr>
54 |   </tbody>
55 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%201/app_stats%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%201/app_stats%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/conn.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/conn.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/dhcp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/dhcp.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/dns.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/dns.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/dpd.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/dpd.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/files.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/files.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/ftp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/ftp.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/http.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/http.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/irc.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/irc.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/loaded_scripts.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>LOADED_SCRIPTS</h1><p>Download: <a href='http://www.secrepo.com'>loaded_scripts </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>647250</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Bro Diagnostics</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>1</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'>Log the loaded scripts.</p><h3>Attribute Information</h3><ul><li>name: A list of scripts that were loaded at startup</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>name</th>
14 |       <td>object</td>
15 |       <td>647250</td>
16 |       <td>265</td>
17 |       <td>0</td>
18 |     </tr>
19 |   </tbody>
20 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%201/loaded_scripts%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%201/loaded_scripts%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/notice.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/notice.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/packet_filter.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/packet_filter.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/ssl.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/ssl.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_1/weird.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_1/weird.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/app_stats.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>APP_STATS </h1><p>Download: <a href='http://www.secrepo.com'>app_stats  </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>594</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Network Protocols</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>6</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'></p><h3>Attribute Information</h3><ul><li>ts: Timestamp of request</li><li>ts_delta: Time difference from previous measurement</li><li>app: Name of application (YouTube, Netflix, etc.)</li><li>uniq_hosts: Number of unique hosts that used app</li><li>hits: Number of visits to app</li><li>bytes: Total bytes transferred to/from app</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>ts</th>
14 |       <td>float64</td>
15 |       <td>594</td>
16 |       <td>498</td>
17 |       <td>0</td>
18 |     </tr>
19 |     <tr>
20 |       <th>ts_delta</th>
21 |       <td>float64</td>
22 |       <td>594</td>
23 |       <td>1</td>
24 |       <td>0</td>
25 |     </tr>
26 |     <tr>
27 |       <th>app</th>
28 |       <td>object</td>
29 |       <td>594</td>
30 |       <td>3</td>
31 |       <td>0</td>
32 |     </tr>
33 |     <tr>
34 |       <th>uniq_hosts</th>
35 |       <td>int64</td>
36 |       <td>594</td>
37 |       <td>1</td>
38 |       <td>0</td>
39 |     </tr>
40 |     <tr>
41 |       <th>hits</th>
42 |       <td>int64</td>
43 |       <td>594</td>
44 |       <td>40</td>
45 |       <td>0</td>
46 |     </tr>
47 |     <tr>
48 |       <th>bytes</th>
49 |       <td>int64</td>
50 |       <td>594</td>
51 |       <td>493</td>
52 |       <td>0</td>
53 |     </tr>
54 |   </tbody>
55 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%202/app_stats%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%202/app_stats%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/conn.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/conn.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/dhcp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/dhcp.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/dns.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/dns.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/dpd.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/dpd.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/files.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/files.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/ftp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/ftp.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/http.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/http.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/irc.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/irc.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/loaded_scripts.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>LOADED_SCRIPTS</h1><p>Download: <a href='http://www.secrepo.com'>loaded_scripts </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>423000</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Bro Diagnostics</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>1</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'>Log the loaded scripts.</p><h3>Attribute Information</h3><ul><li>name: A list of scripts that were loaded at startup</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>name</th>
14 |       <td>object</td>
15 |       <td>423000</td>
16 |       <td>261</td>
17 |       <td>0</td>
18 |     </tr>
19 |   </tbody>
20 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%202/loaded_scripts%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%202/loaded_scripts%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/notice.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/notice.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/packet_filter.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/packet_filter.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/ssl.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/ssl.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_2/weird.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_2/weird.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/app_stats.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>APP_STATS </h1><p>Download: <a href='http://www.secrepo.com'>app_stats  </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>878</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Network Protocols</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>6</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'></p><h3>Attribute Information</h3><ul><li>ts: Timestamp of request</li><li>ts_delta: Time difference from previous measurement</li><li>app: Name of application (YouTube, Netflix, etc.)</li><li>uniq_hosts: Number of unique hosts that used app</li><li>hits: Number of visits to app</li><li>bytes: Total bytes transferred to/from app</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>ts</th>
14 |       <td>float64</td>
15 |       <td>878</td>
16 |       <td>727</td>
17 |       <td>0</td>
18 |     </tr>
19 |     <tr>
20 |       <th>ts_delta</th>
21 |       <td>float64</td>
22 |       <td>878</td>
23 |       <td>1</td>
24 |       <td>0</td>
25 |     </tr>
26 |     <tr>
27 |       <th>app</th>
28 |       <td>object</td>
29 |       <td>878</td>
30 |       <td>3</td>
31 |       <td>0</td>
32 |     </tr>
33 |     <tr>
34 |       <th>uniq_hosts</th>
35 |       <td>int64</td>
36 |       <td>878</td>
37 |       <td>1</td>
38 |       <td>0</td>
39 |     </tr>
40 |     <tr>
41 |       <th>hits</th>
42 |       <td>int64</td>
43 |       <td>878</td>
44 |       <td>46</td>
45 |       <td>0</td>
46 |     </tr>
47 |     <tr>
48 |       <th>bytes</th>
49 |       <td>int64</td>
50 |       <td>878</td>
51 |       <td>715</td>
52 |       <td>0</td>
53 |     </tr>
54 |   </tbody>
55 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%203/app_stats%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%203/app_stats%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/conn.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/conn.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/dhcp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/dhcp.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/dns.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/dns.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/dpd.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/dpd.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/files.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/files.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/http.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/http.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/irc.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/irc.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/loaded_scripts.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>LOADED_SCRIPTS</h1><p>Download: <a href='http://www.secrepo.com'>loaded_scripts </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>595500</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Bro Diagnostics</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>1</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'>Log the loaded scripts.</p><h3>Attribute Information</h3><ul><li>name: A list of scripts that were loaded at startup</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>name</th>
14 |       <td>object</td>
15 |       <td>595500</td>
16 |       <td>265</td>
17 |       <td>0</td>
18 |     </tr>
19 |   </tbody>
20 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%203/loaded_scripts%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%203/loaded_scripts%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/notice.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/notice.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/packet_filter.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>PACKET_FILTER </h1><p>Download: <a href='http://www.secrepo.com'>packet_filter  </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>2382</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>Bro Diagnostics</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>5</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2012</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Analysis</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'>This script supports how Bro sets its BPF capture filter. By default Bro sets a capture filter that allows all traffic. If a filter is set on the command line, that filter takes precedence over the default open filter and all filters defined in Bro scripts with the capture_filters and restrict_filters variables.</p><h3>Attribute Information</h3><ul><li>ts: Timestamp of request</li><li>node</li><li>filter</li><li>init</li><li>success</li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>ts</th>
14 |       <td>float64</td>
15 |       <td>2382</td>
16 |       <td>2382</td>
17 |       <td>0</td>
18 |     </tr>
19 |     <tr>
20 |       <th>node</th>
21 |       <td>object</td>
22 |       <td>2382</td>
23 |       <td>1</td>
24 |       <td>0</td>
25 |     </tr>
26 |     <tr>
27 |       <th>filter</th>
28 |       <td>object</td>
29 |       <td>2382</td>
30 |       <td>1</td>
31 |       <td>0</td>
32 |     </tr>
33 |     <tr>
34 |       <th>init</th>
35 |       <td>object</td>
36 |       <td>2382</td>
37 |       <td>1</td>
38 |       <td>0</td>
39 |     </tr>
40 |     <tr>
41 |       <th>success</th>
42 |       <td>object</td>
43 |       <td>2382</td>
44 |       <td>1</td>
45 |       <td>0</td>
46 |     </tr>
47 |   </tbody>
48 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'>Bro Logs <a href=' http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf'> http://gauss.ececs.uc.edu/Courses/c6055/pdf/bro_log_vars.pdf</a></p><p style='width: 600px'> Neise, Patrick. "Intrusion Detection Through Relationship Analysis". Oct 2016 <a href=' https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352'> https://www.sans.org/reading-room/whitepapers/detection/intrusion-detection-relationship-analysis-37352</a></p><p style='width: 600px'> Frances Bernadette C. De Ocampo, Trisha Mari L. Del Castillo, Miguel Alberto N. Gomez. "AUTOMATED SIGNATURE CREATOR FOR A SIGNATURE BASED INTRUSION DETECTION SYSTEM WITH NETWORK ATTACK DETECTION CAPABILITIES". 2013 <a href=' http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html'> http://sdiwc.net/digital-library/automated-signature-creator-for-a-signature-based-intrusion-detection-system-with-network-attack-detection-capabilities-pancakes.html</a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%203/packet_filter%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/Bro%20Logs%20from%20Threatglass/Part%203/packet_filter%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/ssl.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/ssl.html


--------------------------------------------------------------------------------
/Data/Bro log from Threatglass/HTML_Bro_log_3/weird.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/Bro log from Threatglass/HTML_Bro_log_3/weird.html


--------------------------------------------------------------------------------
/Data/Bro logs from Threatglass datasets/Bro logs from Threatglass datasets.txt:
--------------------------------------------------------------------------------
1 | https://drive.google.com/drive/folders/1P9dC1WXEUrypY0Y9lWnVZgFB1S5ZZ6jd


--------------------------------------------------------------------------------
/Data/PE malware dataset description/OPCleaver.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/PE malware dataset description/OPCleaver.html


--------------------------------------------------------------------------------
/Data/PE malware dataset description/VirusShare.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/PE malware dataset description/VirusShare.html


--------------------------------------------------------------------------------
/Data/PE malware dataset description/Zeus.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/PE malware dataset description/Zeus.html


--------------------------------------------------------------------------------
/Data/PE malware datasets/PE malware datasets.txt:
--------------------------------------------------------------------------------
1 | https://drive.google.com/drive/folders/1u-AFeS8Dctz5vP7Ohdq82npuANwPOoRk


--------------------------------------------------------------------------------
/Data/System datasets description/auth.html:
--------------------------------------------------------------------------------
 1 | <html style='margin-left:50px'><head></head><body><a href='http://www.secrepo.com/'>Home</a><h1>AUTH</h1><p>Download: <a href='http://www.secrepo.com'>auth </a> Zip File</p><h3>Abstract</h3><ul><table style='border: 1px solid black'><tr><th style='border: 1px solid black'>Number of Instances:</th><th style='border: 1px solid black'>86839</th><th style='border: 1px solid black'>Security Area:</th><th style='border: 1px solid black'>System Analysis</th></tr><tr><th style='border: 1px solid black'>Number of Attributes: </th><th style='border: 1px solid black'>4</th><th style='border: 1px solid black'>Date Donated: </th><th style='border: 1px solid black'>2014</th></tr><tr><th style='border: 1px solid black'>Missing Values? </th><th style='border: 1px solid black'>-</th><th style='border: 1px solid black'>Associated ML Tasks: </th><th style='border: 1px solid black'>Network Categorize</th></tr></table></ul><h3>Source</h3><p style='width: 600px'>Mike Sconzo</p><p style='width: 600px'>Security Repository</p><p style='width: 600px'>Secrepo.com</p><h3>Dataset Information</h3><p style='width: 600px'>-</p><h3>Attribute Information</h3><ul><li>ts: A Unix timestamp as UTC seconds with a millisecond resolution</li><li>ip: IP address involved in the login attempts</li><li>daemon:  sshd is a daemon that serves incoming SSH connections</li><li>message: result of the connection </li></ul><ul><table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>Data Type</th>
 6 |       <th>Count</th>
 7 |       <th>Unique Values</th>
 8 |       <th>Missing Values</th>
 9 |     </tr>
10 |   </thead>
11 |   <tbody>
12 |     <tr>
13 |       <th>ts</th>
14 |       <td>object</td>
15 |       <td>86839</td>
16 |       <td>23140</td>
17 |       <td>0</td>
18 |     </tr>
19 |     <tr>
20 |       <th>ip</th>
21 |       <td>object</td>
22 |       <td>86839</td>
23 |       <td>27895</td>
24 |       <td>0</td>
25 |     </tr>
26 |     <tr>
27 |       <th>daemon</th>
28 |       <td>object</td>
29 |       <td>86839</td>
30 |       <td>18683</td>
31 |       <td>0</td>
32 |     </tr>
33 |     <tr>
34 |       <th>message</th>
35 |       <td>object</td>
36 |       <td>86839</td>
37 |       <td>47907</td>
38 |       <td>0</td>
39 |     </tr>
40 |   </tbody>
41 | </table></ul><h3>Relevant Papers</h3><p style='width: 600px'><a href=''></a></p><h3>Associate Data Science Notebook</h3><a href='https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/System%20analysis/auth%20analysis.ipynb'>https://github.com/cyberdefenders/MachineLearning/blob/master/Data_analysis/System%20analysis/auth%20analysis.ipynb</a></body></html>


--------------------------------------------------------------------------------
/Data/System datasets/System datasets.txt:
--------------------------------------------------------------------------------
1 | https://drive.google.com/drive/folders/1g8I08WAe0HaIl5nfZH1X-kIC5ZQwAUOu


--------------------------------------------------------------------------------
/Data/network datasets description/dhcp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/dhcp.html


--------------------------------------------------------------------------------
/Data/network datasets description/dns.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/dns.html


--------------------------------------------------------------------------------
/Data/network datasets description/files.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/files.html


--------------------------------------------------------------------------------
/Data/network datasets description/ftp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/ftp.html


--------------------------------------------------------------------------------
/Data/network datasets description/http.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/http.html


--------------------------------------------------------------------------------
/Data/network datasets description/notice.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/notice.html


--------------------------------------------------------------------------------
/Data/network datasets description/smtp.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/smtp.html


--------------------------------------------------------------------------------
/Data/network datasets description/ssh.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/ssh.html


--------------------------------------------------------------------------------
/Data/network datasets description/ssl.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/ssl.html


--------------------------------------------------------------------------------
/Data/network datasets description/tunnel.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/tunnel.html


--------------------------------------------------------------------------------
/Data/network datasets description/weird.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyberdefendersprogram/MachineLearning/56f4551433e091bcd8b185df7fc0048ba3a7bf00/Data/network datasets description/weird.html


--------------------------------------------------------------------------------
/Data/network datasets/network datasets.txt:
--------------------------------------------------------------------------------
1 | https://drive.google.com/open?id=1qBmJhVqPprD-esGKgtm6VLn_YOpjawJ-


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/app_stats analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"app_stats.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>942</td>\n",
 60 |        "      <td>798</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>ts_delta</th>\n",
 65 |        "      <td>float64</td>\n",
 66 |        "      <td>942</td>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>app</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>942</td>\n",
 74 |        "      <td>3</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>uniq_hosts</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>942</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>hits</th>\n",
 86 |        "      <td>int64</td>\n",
 87 |        "      <td>942</td>\n",
 88 |        "      <td>47</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>bytes</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>942</td>\n",
 95 |        "      <td>770</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "  </tbody>\n",
 99 |        "</table>\n",
100 |        "</div>"
101 |       ],
102 |       "text/plain": [
103 |        "           Data Type  Count Unique Values  Missing Values\n",
104 |        "ts           float64    942           798               0\n",
105 |        "ts_delta     float64    942             1               0\n",
106 |        "app           object    942             3               0\n",
107 |        "uniq_hosts     int64    942             1               0\n",
108 |        "hits           int64    942            47               0\n",
109 |        "bytes          int64    942           770               0"
110 |       ]
111 |      },
112 |      "execution_count": 2,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "#DataFrame with columns\n",
119 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
120 |     "\n",
121 |     "#DataFrame with data types\n",
122 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
123 |     "\n",
124 |     "#DataFrame with Count\n",
125 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
126 |     "\n",
127 |     "#DataFrame with unique values\n",
128 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
129 |     "for v in list(df.columns.values):\n",
130 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
131 |     "\n",
132 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
133 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
134 |     "print('Data Quality Report')\n",
135 |     "data_quality_report"
136 |    ]
137 |   }
138 |  ],
139 |  "metadata": {
140 |   "kernelspec": {
141 |    "display_name": "Python 3",
142 |    "language": "python",
143 |    "name": "python3"
144 |   },
145 |   "language_info": {
146 |    "codemirror_mode": {
147 |     "name": "ipython",
148 |     "version": 3
149 |    },
150 |    "file_extension": ".py",
151 |    "mimetype": "text/x-python",
152 |    "name": "python",
153 |    "nbconvert_exporter": "python",
154 |    "pygments_lexer": "ipython3",
155 |    "version": "3.6.5"
156 |   }
157 |  },
158 |  "nbformat": 4,
159 |  "nbformat_minor": 2
160 | }
161 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/dhcp analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dhcp.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>5206</td>\n",
 60 |        "      <td>5206</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>5206</td>\n",
 67 |        "      <td>2601</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>5206</td>\n",
 74 |        "      <td>128</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>5206</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>5206</td>\n",
 88 |        "      <td>128</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>5206</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>mac</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>5206</td>\n",
102 |        "      <td>128</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>assigned_ip</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>5206</td>\n",
109 |        "      <td>128</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>lease_time</th>\n",
114 |        "      <td>float64</td>\n",
115 |        "      <td>5206</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>trans_id</th>\n",
121 |        "      <td>int64</td>\n",
122 |        "      <td>5206</td>\n",
123 |        "      <td>5206</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "            Data Type  Count Unique Values  Missing Values\n",
132 |        "ts            float64   5206          5206               0\n",
133 |        "uid            object   5206          2601               0\n",
134 |        "id.orig_h      object   5206           128               0\n",
135 |        "id.orig_p       int64   5206             1               0\n",
136 |        "id.resp_h      object   5206           128               0\n",
137 |        "id.resp_p       int64   5206             1               0\n",
138 |        "mac            object   5206           128               0\n",
139 |        "assigned_ip    object   5206           128               0\n",
140 |        "lease_time    float64   5206             1               0\n",
141 |        "trans_id        int64   5206          5206               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/dpd analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dpd.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 6,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>88</td>\n",
 60 |        "      <td>88</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>88</td>\n",
 67 |        "      <td>88</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>88</td>\n",
 74 |        "      <td>31</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>88</td>\n",
 81 |        "      <td>65</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>88</td>\n",
 88 |        "      <td>30</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>88</td>\n",
 95 |        "      <td>6</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>proto</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>88</td>\n",
102 |        "      <td>1</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>analyzer</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>88</td>\n",
109 |        "      <td>2</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>failure_reason</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>88</td>\n",
116 |        "      <td>4</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "  </tbody>\n",
120 |        "</table>\n",
121 |        "</div>"
122 |       ],
123 |       "text/plain": [
124 |        "               Data Type  Count Unique Values  Missing Values\n",
125 |        "ts               float64     88            88               0\n",
126 |        "uid               object     88            88               0\n",
127 |        "id.orig_h         object     88            31               0\n",
128 |        "id.orig_p          int64     88            65               0\n",
129 |        "id.resp_h         object     88            30               0\n",
130 |        "id.resp_p          int64     88             6               0\n",
131 |        "proto             object     88             1               0\n",
132 |        "analyzer          object     88             2               0\n",
133 |        "failure_reason    object     88             4               0"
134 |       ]
135 |      },
136 |      "execution_count": 6,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "#DataFrame with columns\n",
143 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
144 |     "\n",
145 |     "#DataFrame with data types\n",
146 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
147 |     "\n",
148 |     "#DataFrame with Count\n",
149 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
150 |     "\n",
151 |     "#DataFrame with unique values\n",
152 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
153 |     "for v in list(df.columns.values):\n",
154 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
155 |     "\n",
156 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
157 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
158 |     "print('Data Quality Report')\n",
159 |     "data_quality_report"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": []
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.6.5"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/irc analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"irc.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>30</td>\n",
 60 |        "      <td>20</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>30</td>\n",
 67 |        "      <td>8</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>30</td>\n",
 74 |        "      <td>7</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>30</td>\n",
 81 |        "      <td>7</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>30</td>\n",
 88 |        "      <td>7</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>30</td>\n",
 95 |        "      <td>7</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>nick</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>22</td>\n",
102 |        "      <td>8</td>\n",
103 |        "      <td>8</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>user</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>14</td>\n",
109 |        "      <td>6</td>\n",
110 |        "      <td>16</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>command</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>30</td>\n",
116 |        "      <td>3</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>value</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>30</td>\n",
123 |        "      <td>20</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>addl</th>\n",
128 |        "      <td>object</td>\n",
129 |        "      <td>22</td>\n",
130 |        "      <td>13</td>\n",
131 |        "      <td>8</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>dcc_file_name</th>\n",
135 |        "      <td>float64</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>0</td>\n",
138 |        "      <td>30</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>dcc_file_size</th>\n",
142 |        "      <td>float64</td>\n",
143 |        "      <td>0</td>\n",
144 |        "      <td>0</td>\n",
145 |        "      <td>30</td>\n",
146 |        "    </tr>\n",
147 |        "    <tr>\n",
148 |        "      <th>dcc_mime_type</th>\n",
149 |        "      <td>float64</td>\n",
150 |        "      <td>0</td>\n",
151 |        "      <td>0</td>\n",
152 |        "      <td>30</td>\n",
153 |        "    </tr>\n",
154 |        "    <tr>\n",
155 |        "      <th>fuid</th>\n",
156 |        "      <td>float64</td>\n",
157 |        "      <td>0</td>\n",
158 |        "      <td>0</td>\n",
159 |        "      <td>30</td>\n",
160 |        "    </tr>\n",
161 |        "  </tbody>\n",
162 |        "</table>\n",
163 |        "</div>"
164 |       ],
165 |       "text/plain": [
166 |        "              Data Type  Count Unique Values  Missing Values\n",
167 |        "ts              float64     30            20               0\n",
168 |        "uid              object     30             8               0\n",
169 |        "id.orig_h        object     30             7               0\n",
170 |        "id.orig_p         int64     30             7               0\n",
171 |        "id.resp_h        object     30             7               0\n",
172 |        "id.resp_p         int64     30             7               0\n",
173 |        "nick             object     22             8               8\n",
174 |        "user             object     14             6              16\n",
175 |        "command          object     30             3               0\n",
176 |        "value            object     30            20               0\n",
177 |        "addl             object     22            13               8\n",
178 |        "dcc_file_name   float64      0             0              30\n",
179 |        "dcc_file_size   float64      0             0              30\n",
180 |        "dcc_mime_type   float64      0             0              30\n",
181 |        "fuid            float64      0             0              30"
182 |       ]
183 |      },
184 |      "execution_count": 2,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "#DataFrame with columns\n",
191 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
192 |     "\n",
193 |     "#DataFrame with data types\n",
194 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
195 |     "\n",
196 |     "#DataFrame with Count\n",
197 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
198 |     "\n",
199 |     "#DataFrame with unique values\n",
200 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
201 |     "for v in list(df.columns.values):\n",
202 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
203 |     "\n",
204 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
205 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
206 |     "print('Data Quality Report')\n",
207 |     "data_quality_report"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": []
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.6.5"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 2
239 | }
240 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/loaded_scripts analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"loaded_scripts.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>name</th>\n",
 58 |        "      <td>object</td>\n",
 59 |        "      <td>647250</td>\n",
 60 |        "      <td>265</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "  </tbody>\n",
 64 |        "</table>\n",
 65 |        "</div>"
 66 |       ],
 67 |       "text/plain": [
 68 |        "     Data Type   Count Unique Values  Missing Values\n",
 69 |        "name    object  647250           265               0"
 70 |       ]
 71 |      },
 72 |      "execution_count": 2,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "#DataFrame with columns\n",
 79 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
 80 |     "\n",
 81 |     "#DataFrame with data types\n",
 82 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
 83 |     "\n",
 84 |     "#DataFrame with Count\n",
 85 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
 86 |     "\n",
 87 |     "#DataFrame with unique values\n",
 88 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
 89 |     "for v in list(df.columns.values):\n",
 90 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
 91 |     "\n",
 92 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
 93 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
 94 |     "print('Data Quality Report')\n",
 95 |     "data_quality_report"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.6.5"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 2
127 | }
128 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/packet_filter analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"packet_filter.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>2589</td>\n",
 60 |        "      <td>2589</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>node</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>2589</td>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>filter</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>2589</td>\n",
 74 |        "      <td>1</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>init</th>\n",
 79 |        "      <td>object</td>\n",
 80 |        "      <td>2589</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>success</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>2589</td>\n",
 88 |        "      <td>1</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "  </tbody>\n",
 92 |        "</table>\n",
 93 |        "</div>"
 94 |       ],
 95 |       "text/plain": [
 96 |        "        Data Type  Count Unique Values  Missing Values\n",
 97 |        "ts        float64   2589          2589               0\n",
 98 |        "node       object   2589             1               0\n",
 99 |        "filter     object   2589             1               0\n",
100 |        "init       object   2589             1               0\n",
101 |        "success    object   2589             1               0"
102 |       ]
103 |      },
104 |      "execution_count": 2,
105 |      "metadata": {},
106 |      "output_type": "execute_result"
107 |     }
108 |    ],
109 |    "source": [
110 |     "#DataFrame with columns\n",
111 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
112 |     "\n",
113 |     "#DataFrame with data types\n",
114 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
115 |     "\n",
116 |     "#DataFrame with Count\n",
117 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
118 |     "\n",
119 |     "#DataFrame with unique values\n",
120 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
121 |     "for v in list(df.columns.values):\n",
122 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
123 |     "\n",
124 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
125 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
126 |     "print('Data Quality Report')\n",
127 |     "data_quality_report"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": []
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.6.5"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 1/weird analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"weird.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>14005</td>\n",
 60 |        "      <td>13892</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>11416</td>\n",
 67 |        "      <td>11074</td>\n",
 68 |        "      <td>2589</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>11416</td>\n",
 74 |        "      <td>135</td>\n",
 75 |        "      <td>2589</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>float64</td>\n",
 80 |        "      <td>11416</td>\n",
 81 |        "      <td>560</td>\n",
 82 |        "      <td>2589</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>11416</td>\n",
 88 |        "      <td>1021</td>\n",
 89 |        "      <td>2589</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>float64</td>\n",
 94 |        "      <td>11416</td>\n",
 95 |        "      <td>24</td>\n",
 96 |        "      <td>2589</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>name</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>14005</td>\n",
102 |        "      <td>30</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>addl</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>2</td>\n",
109 |        "      <td>2</td>\n",
110 |        "      <td>14003</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>notice</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>14005</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>peer</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>14005</td>\n",
123 |        "      <td>1</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "          Data Type  Count Unique Values  Missing Values\n",
132 |        "ts          float64  14005         13892               0\n",
133 |        "uid          object  11416         11074            2589\n",
134 |        "id.orig_h    object  11416           135            2589\n",
135 |        "id.orig_p   float64  11416           560            2589\n",
136 |        "id.resp_h    object  11416          1021            2589\n",
137 |        "id.resp_p   float64  11416            24            2589\n",
138 |        "name         object  14005            30               0\n",
139 |        "addl         object      2             2           14003\n",
140 |        "notice       object  14005             1               0\n",
141 |        "peer         object  14005             1               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/app_stats analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"app_stats.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>594</td>\n",
 60 |        "      <td>498</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>ts_delta</th>\n",
 65 |        "      <td>float64</td>\n",
 66 |        "      <td>594</td>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>app</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>594</td>\n",
 74 |        "      <td>3</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>uniq_hosts</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>594</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>hits</th>\n",
 86 |        "      <td>int64</td>\n",
 87 |        "      <td>594</td>\n",
 88 |        "      <td>40</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>bytes</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>594</td>\n",
 95 |        "      <td>493</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "  </tbody>\n",
 99 |        "</table>\n",
100 |        "</div>"
101 |       ],
102 |       "text/plain": [
103 |        "           Data Type  Count Unique Values  Missing Values\n",
104 |        "ts           float64    594           498               0\n",
105 |        "ts_delta     float64    594             1               0\n",
106 |        "app           object    594             3               0\n",
107 |        "uniq_hosts     int64    594             1               0\n",
108 |        "hits           int64    594            40               0\n",
109 |        "bytes          int64    594           493               0"
110 |       ]
111 |      },
112 |      "execution_count": 2,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "#DataFrame with columns\n",
119 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
120 |     "\n",
121 |     "#DataFrame with data types\n",
122 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
123 |     "\n",
124 |     "#DataFrame with Count\n",
125 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
126 |     "\n",
127 |     "#DataFrame with unique values\n",
128 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
129 |     "for v in list(df.columns.values):\n",
130 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
131 |     "\n",
132 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
133 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
134 |     "print('Data Quality Report')\n",
135 |     "data_quality_report"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": []
144 |   }
145 |  ],
146 |  "metadata": {
147 |   "kernelspec": {
148 |    "display_name": "Python 3",
149 |    "language": "python",
150 |    "name": "python3"
151 |   },
152 |   "language_info": {
153 |    "codemirror_mode": {
154 |     "name": "ipython",
155 |     "version": 3
156 |    },
157 |    "file_extension": ".py",
158 |    "mimetype": "text/x-python",
159 |    "name": "python",
160 |    "nbconvert_exporter": "python",
161 |    "pygments_lexer": "ipython3",
162 |    "version": "3.6.5"
163 |   }
164 |  },
165 |  "nbformat": 4,
166 |  "nbformat_minor": 2
167 | }
168 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/dhcpanalysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dhcp.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>3399</td>\n",
 60 |        "      <td>3399</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>3399</td>\n",
 67 |        "      <td>1699</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>3399</td>\n",
 74 |        "      <td>128</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>3399</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>3399</td>\n",
 88 |        "      <td>128</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>3399</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>mac</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>3399</td>\n",
102 |        "      <td>128</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>assigned_ip</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>3399</td>\n",
109 |        "      <td>128</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>lease_time</th>\n",
114 |        "      <td>float64</td>\n",
115 |        "      <td>3399</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>trans_id</th>\n",
121 |        "      <td>int64</td>\n",
122 |        "      <td>3399</td>\n",
123 |        "      <td>3399</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "            Data Type  Count Unique Values  Missing Values\n",
132 |        "ts            float64   3399          3399               0\n",
133 |        "uid            object   3399          1699               0\n",
134 |        "id.orig_h      object   3399           128               0\n",
135 |        "id.orig_p       int64   3399             1               0\n",
136 |        "id.resp_h      object   3399           128               0\n",
137 |        "id.resp_p       int64   3399             1               0\n",
138 |        "mac            object   3399           128               0\n",
139 |        "assigned_ip    object   3399           128               0\n",
140 |        "lease_time    float64   3399             1               0\n",
141 |        "trans_id        int64   3399          3399               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/dpd analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dpd.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>49</td>\n",
 60 |        "      <td>49</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>49</td>\n",
 67 |        "      <td>49</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>49</td>\n",
 74 |        "      <td>11</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>49</td>\n",
 81 |        "      <td>42</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>49</td>\n",
 88 |        "      <td>16</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>49</td>\n",
 95 |        "      <td>4</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>proto</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>49</td>\n",
102 |        "      <td>1</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>analyzer</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>49</td>\n",
109 |        "      <td>1</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>failure_reason</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>49</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "  </tbody>\n",
120 |        "</table>\n",
121 |        "</div>"
122 |       ],
123 |       "text/plain": [
124 |        "               Data Type  Count Unique Values  Missing Values\n",
125 |        "ts               float64     49            49               0\n",
126 |        "uid               object     49            49               0\n",
127 |        "id.orig_h         object     49            11               0\n",
128 |        "id.orig_p          int64     49            42               0\n",
129 |        "id.resp_h         object     49            16               0\n",
130 |        "id.resp_p          int64     49             4               0\n",
131 |        "proto             object     49             1               0\n",
132 |        "analyzer          object     49             1               0\n",
133 |        "failure_reason    object     49             1               0"
134 |       ]
135 |      },
136 |      "execution_count": 2,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "#DataFrame with columns\n",
143 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
144 |     "\n",
145 |     "#DataFrame with data types\n",
146 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
147 |     "\n",
148 |     "#DataFrame with Count\n",
149 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
150 |     "\n",
151 |     "#DataFrame with unique values\n",
152 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
153 |     "for v in list(df.columns.values):\n",
154 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
155 |     "\n",
156 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
157 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
158 |     "print('Data Quality Report')\n",
159 |     "data_quality_report"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": []
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.6.5"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/irc analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"irc.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>6</td>\n",
 60 |        "      <td>4</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>6</td>\n",
 67 |        "      <td>2</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>6</td>\n",
 74 |        "      <td>2</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>6</td>\n",
 81 |        "      <td>2</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>6</td>\n",
 88 |        "      <td>2</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>6</td>\n",
 95 |        "      <td>2</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>nick</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>4</td>\n",
102 |        "      <td>2</td>\n",
103 |        "      <td>2</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>user</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>2</td>\n",
109 |        "      <td>2</td>\n",
110 |        "      <td>4</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>command</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>6</td>\n",
116 |        "      <td>3</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>value</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>6</td>\n",
123 |        "      <td>6</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>addl</th>\n",
128 |        "      <td>object</td>\n",
129 |        "      <td>4</td>\n",
130 |        "      <td>4</td>\n",
131 |        "      <td>2</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>dcc_file_name</th>\n",
135 |        "      <td>float64</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>0</td>\n",
138 |        "      <td>6</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>dcc_file_size</th>\n",
142 |        "      <td>float64</td>\n",
143 |        "      <td>0</td>\n",
144 |        "      <td>0</td>\n",
145 |        "      <td>6</td>\n",
146 |        "    </tr>\n",
147 |        "    <tr>\n",
148 |        "      <th>dcc_mime_type</th>\n",
149 |        "      <td>float64</td>\n",
150 |        "      <td>0</td>\n",
151 |        "      <td>0</td>\n",
152 |        "      <td>6</td>\n",
153 |        "    </tr>\n",
154 |        "    <tr>\n",
155 |        "      <th>fuid</th>\n",
156 |        "      <td>float64</td>\n",
157 |        "      <td>0</td>\n",
158 |        "      <td>0</td>\n",
159 |        "      <td>6</td>\n",
160 |        "    </tr>\n",
161 |        "  </tbody>\n",
162 |        "</table>\n",
163 |        "</div>"
164 |       ],
165 |       "text/plain": [
166 |        "              Data Type  Count Unique Values  Missing Values\n",
167 |        "ts              float64      6             4               0\n",
168 |        "uid              object      6             2               0\n",
169 |        "id.orig_h        object      6             2               0\n",
170 |        "id.orig_p         int64      6             2               0\n",
171 |        "id.resp_h        object      6             2               0\n",
172 |        "id.resp_p         int64      6             2               0\n",
173 |        "nick             object      4             2               2\n",
174 |        "user             object      2             2               4\n",
175 |        "command          object      6             3               0\n",
176 |        "value            object      6             6               0\n",
177 |        "addl             object      4             4               2\n",
178 |        "dcc_file_name   float64      0             0               6\n",
179 |        "dcc_file_size   float64      0             0               6\n",
180 |        "dcc_mime_type   float64      0             0               6\n",
181 |        "fuid            float64      0             0               6"
182 |       ]
183 |      },
184 |      "execution_count": 2,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "#DataFrame with columns\n",
191 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
192 |     "\n",
193 |     "#DataFrame with data types\n",
194 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
195 |     "\n",
196 |     "#DataFrame with Count\n",
197 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
198 |     "\n",
199 |     "#DataFrame with unique values\n",
200 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
201 |     "for v in list(df.columns.values):\n",
202 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
203 |     "\n",
204 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
205 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
206 |     "print('Data Quality Report')\n",
207 |     "data_quality_report"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": []
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.6.5"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 2
239 | }
240 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/loaded_scripts analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"loaded_scripts.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>name</th>\n",
 58 |        "      <td>object</td>\n",
 59 |        "      <td>423000</td>\n",
 60 |        "      <td>261</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "  </tbody>\n",
 64 |        "</table>\n",
 65 |        "</div>"
 66 |       ],
 67 |       "text/plain": [
 68 |        "     Data Type   Count Unique Values  Missing Values\n",
 69 |        "name    object  423000           261               0"
 70 |       ]
 71 |      },
 72 |      "execution_count": 2,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "#DataFrame with columns\n",
 79 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
 80 |     "\n",
 81 |     "#DataFrame with data types\n",
 82 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
 83 |     "\n",
 84 |     "#DataFrame with Count\n",
 85 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
 86 |     "\n",
 87 |     "#DataFrame with unique values\n",
 88 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
 89 |     "for v in list(df.columns.values):\n",
 90 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
 91 |     "\n",
 92 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
 93 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
 94 |     "print('Data Quality Report')\n",
 95 |     "data_quality_report"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.6.5"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 2
127 | }
128 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/packet_filter analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"packet_filter.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>1692</td>\n",
 60 |        "      <td>1692</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>node</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>1692</td>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>filter</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>1692</td>\n",
 74 |        "      <td>1</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>init</th>\n",
 79 |        "      <td>object</td>\n",
 80 |        "      <td>1692</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>success</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>1692</td>\n",
 88 |        "      <td>1</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "  </tbody>\n",
 92 |        "</table>\n",
 93 |        "</div>"
 94 |       ],
 95 |       "text/plain": [
 96 |        "        Data Type  Count Unique Values  Missing Values\n",
 97 |        "ts        float64   1692          1692               0\n",
 98 |        "node       object   1692             1               0\n",
 99 |        "filter     object   1692             1               0\n",
100 |        "init       object   1692             1               0\n",
101 |        "success    object   1692             1               0"
102 |       ]
103 |      },
104 |      "execution_count": 2,
105 |      "metadata": {},
106 |      "output_type": "execute_result"
107 |     }
108 |    ],
109 |    "source": [
110 |     "#DataFrame with columns\n",
111 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
112 |     "\n",
113 |     "#DataFrame with data types\n",
114 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
115 |     "\n",
116 |     "#DataFrame with Count\n",
117 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
118 |     "\n",
119 |     "#DataFrame with unique values\n",
120 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
121 |     "for v in list(df.columns.values):\n",
122 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
123 |     "\n",
124 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
125 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
126 |     "print('Data Quality Report')\n",
127 |     "data_quality_report"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": []
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.6.5"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 2/weird analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"weird.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>9583</td>\n",
 60 |        "      <td>9521</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>7891</td>\n",
 67 |        "      <td>7690</td>\n",
 68 |        "      <td>1692</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>7891</td>\n",
 74 |        "      <td>129</td>\n",
 75 |        "      <td>1692</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>float64</td>\n",
 80 |        "      <td>7891</td>\n",
 81 |        "      <td>377</td>\n",
 82 |        "      <td>1692</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>7891</td>\n",
 88 |        "      <td>624</td>\n",
 89 |        "      <td>1692</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>float64</td>\n",
 94 |        "      <td>7891</td>\n",
 95 |        "      <td>13</td>\n",
 96 |        "      <td>1692</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>name</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>9583</td>\n",
102 |        "      <td>23</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>addl</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>7</td>\n",
109 |        "      <td>1</td>\n",
110 |        "      <td>9576</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>notice</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>9583</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>peer</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>9583</td>\n",
123 |        "      <td>1</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "          Data Type  Count Unique Values  Missing Values\n",
132 |        "ts          float64   9583          9521               0\n",
133 |        "uid          object   7891          7690            1692\n",
134 |        "id.orig_h    object   7891           129            1692\n",
135 |        "id.orig_p   float64   7891           377            1692\n",
136 |        "id.resp_h    object   7891           624            1692\n",
137 |        "id.resp_p   float64   7891            13            1692\n",
138 |        "name         object   9583            23               0\n",
139 |        "addl         object      7             1            9576\n",
140 |        "notice       object   9583             1               0\n",
141 |        "peer         object   9583             1               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/app_stats analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"app_stats.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>878</td>\n",
 60 |        "      <td>727</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>ts_delta</th>\n",
 65 |        "      <td>float64</td>\n",
 66 |        "      <td>878</td>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>app</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>878</td>\n",
 74 |        "      <td>3</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>uniq_hosts</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>878</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>hits</th>\n",
 86 |        "      <td>int64</td>\n",
 87 |        "      <td>878</td>\n",
 88 |        "      <td>46</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>bytes</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>878</td>\n",
 95 |        "      <td>715</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "  </tbody>\n",
 99 |        "</table>\n",
100 |        "</div>"
101 |       ],
102 |       "text/plain": [
103 |        "           Data Type  Count Unique Values  Missing Values\n",
104 |        "ts           float64    878           727               0\n",
105 |        "ts_delta     float64    878             1               0\n",
106 |        "app           object    878             3               0\n",
107 |        "uniq_hosts     int64    878             1               0\n",
108 |        "hits           int64    878            46               0\n",
109 |        "bytes          int64    878           715               0"
110 |       ]
111 |      },
112 |      "execution_count": 2,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "#DataFrame with columns\n",
119 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
120 |     "\n",
121 |     "#DataFrame with data types\n",
122 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
123 |     "\n",
124 |     "#DataFrame with Count\n",
125 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
126 |     "\n",
127 |     "#DataFrame with unique values\n",
128 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
129 |     "for v in list(df.columns.values):\n",
130 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
131 |     "\n",
132 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
133 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
134 |     "print('Data Quality Report')\n",
135 |     "data_quality_report"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": []
144 |   }
145 |  ],
146 |  "metadata": {
147 |   "kernelspec": {
148 |    "display_name": "Python 3",
149 |    "language": "python",
150 |    "name": "python3"
151 |   },
152 |   "language_info": {
153 |    "codemirror_mode": {
154 |     "name": "ipython",
155 |     "version": 3
156 |    },
157 |    "file_extension": ".py",
158 |    "mimetype": "text/x-python",
159 |    "name": "python",
160 |    "nbconvert_exporter": "python",
161 |    "pygments_lexer": "ipython3",
162 |    "version": "3.6.5"
163 |   }
164 |  },
165 |  "nbformat": 4,
166 |  "nbformat_minor": 2
167 | }
168 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/dhcp analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dhcp.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>4780</td>\n",
 60 |        "      <td>4780</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>4780</td>\n",
 67 |        "      <td>2393</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>4780</td>\n",
 74 |        "      <td>128</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>4780</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>4780</td>\n",
 88 |        "      <td>128</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>4780</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>mac</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>4780</td>\n",
102 |        "      <td>128</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>assigned_ip</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>4780</td>\n",
109 |        "      <td>128</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>lease_time</th>\n",
114 |        "      <td>float64</td>\n",
115 |        "      <td>4780</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>trans_id</th>\n",
121 |        "      <td>int64</td>\n",
122 |        "      <td>4780</td>\n",
123 |        "      <td>4780</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "            Data Type  Count Unique Values  Missing Values\n",
132 |        "ts            float64   4780          4780               0\n",
133 |        "uid            object   4780          2393               0\n",
134 |        "id.orig_h      object   4780           128               0\n",
135 |        "id.orig_p       int64   4780             1               0\n",
136 |        "id.resp_h      object   4780           128               0\n",
137 |        "id.resp_p       int64   4780             1               0\n",
138 |        "mac            object   4780           128               0\n",
139 |        "assigned_ip    object   4780           128               0\n",
140 |        "lease_time    float64   4780             1               0\n",
141 |        "trans_id        int64   4780          4780               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/dpd analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dpd.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>75</td>\n",
 60 |        "      <td>75</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>75</td>\n",
 67 |        "      <td>75</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>75</td>\n",
 74 |        "      <td>24</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>75</td>\n",
 81 |        "      <td>61</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>75</td>\n",
 88 |        "      <td>29</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>75</td>\n",
 95 |        "      <td>4</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>proto</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>75</td>\n",
102 |        "      <td>1</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>analyzer</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>75</td>\n",
109 |        "      <td>1</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>failure_reason</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>75</td>\n",
116 |        "      <td>2</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "  </tbody>\n",
120 |        "</table>\n",
121 |        "</div>"
122 |       ],
123 |       "text/plain": [
124 |        "               Data Type  Count Unique Values  Missing Values\n",
125 |        "ts               float64     75            75               0\n",
126 |        "uid               object     75            75               0\n",
127 |        "id.orig_h         object     75            24               0\n",
128 |        "id.orig_p          int64     75            61               0\n",
129 |        "id.resp_h         object     75            29               0\n",
130 |        "id.resp_p          int64     75             4               0\n",
131 |        "proto             object     75             1               0\n",
132 |        "analyzer          object     75             1               0\n",
133 |        "failure_reason    object     75             2               0"
134 |       ]
135 |      },
136 |      "execution_count": 2,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "#DataFrame with columns\n",
143 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
144 |     "\n",
145 |     "#DataFrame with data types\n",
146 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
147 |     "\n",
148 |     "#DataFrame with Count\n",
149 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
150 |     "\n",
151 |     "#DataFrame with unique values\n",
152 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
153 |     "for v in list(df.columns.values):\n",
154 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
155 |     "\n",
156 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
157 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
158 |     "print('Data Quality Report')\n",
159 |     "data_quality_report"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": []
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.6.5"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/irc analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"irc.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>21</td>\n",
 60 |        "      <td>15</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>21</td>\n",
 67 |        "      <td>6</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>21</td>\n",
 74 |        "      <td>6</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>21</td>\n",
 81 |        "      <td>6</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>21</td>\n",
 88 |        "      <td>5</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>21</td>\n",
 95 |        "      <td>5</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>nick</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>15</td>\n",
102 |        "      <td>6</td>\n",
103 |        "      <td>6</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>user</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>9</td>\n",
109 |        "      <td>6</td>\n",
110 |        "      <td>12</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>command</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>21</td>\n",
116 |        "      <td>3</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>value</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>21</td>\n",
123 |        "      <td>18</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>addl</th>\n",
128 |        "      <td>object</td>\n",
129 |        "      <td>15</td>\n",
130 |        "      <td>12</td>\n",
131 |        "      <td>6</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>dcc_file_name</th>\n",
135 |        "      <td>float64</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>0</td>\n",
138 |        "      <td>21</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>dcc_file_size</th>\n",
142 |        "      <td>float64</td>\n",
143 |        "      <td>0</td>\n",
144 |        "      <td>0</td>\n",
145 |        "      <td>21</td>\n",
146 |        "    </tr>\n",
147 |        "    <tr>\n",
148 |        "      <th>dcc_mime_type</th>\n",
149 |        "      <td>float64</td>\n",
150 |        "      <td>0</td>\n",
151 |        "      <td>0</td>\n",
152 |        "      <td>21</td>\n",
153 |        "    </tr>\n",
154 |        "    <tr>\n",
155 |        "      <th>fuid</th>\n",
156 |        "      <td>float64</td>\n",
157 |        "      <td>0</td>\n",
158 |        "      <td>0</td>\n",
159 |        "      <td>21</td>\n",
160 |        "    </tr>\n",
161 |        "  </tbody>\n",
162 |        "</table>\n",
163 |        "</div>"
164 |       ],
165 |       "text/plain": [
166 |        "              Data Type  Count Unique Values  Missing Values\n",
167 |        "ts              float64     21            15               0\n",
168 |        "uid              object     21             6               0\n",
169 |        "id.orig_h        object     21             6               0\n",
170 |        "id.orig_p         int64     21             6               0\n",
171 |        "id.resp_h        object     21             5               0\n",
172 |        "id.resp_p         int64     21             5               0\n",
173 |        "nick             object     15             6               6\n",
174 |        "user             object      9             6              12\n",
175 |        "command          object     21             3               0\n",
176 |        "value            object     21            18               0\n",
177 |        "addl             object     15            12               6\n",
178 |        "dcc_file_name   float64      0             0              21\n",
179 |        "dcc_file_size   float64      0             0              21\n",
180 |        "dcc_mime_type   float64      0             0              21\n",
181 |        "fuid            float64      0             0              21"
182 |       ]
183 |      },
184 |      "execution_count": 2,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "#DataFrame with columns\n",
191 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
192 |     "\n",
193 |     "#DataFrame with data types\n",
194 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
195 |     "\n",
196 |     "#DataFrame with Count\n",
197 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
198 |     "\n",
199 |     "#DataFrame with unique values\n",
200 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
201 |     "for v in list(df.columns.values):\n",
202 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
203 |     "\n",
204 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
205 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
206 |     "print('Data Quality Report')\n",
207 |     "data_quality_report"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": []
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.6.5"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 2
239 | }
240 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/loaded_scripts analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"loaded_scripts.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>name</th>\n",
 58 |        "      <td>object</td>\n",
 59 |        "      <td>595500</td>\n",
 60 |        "      <td>265</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "  </tbody>\n",
 64 |        "</table>\n",
 65 |        "</div>"
 66 |       ],
 67 |       "text/plain": [
 68 |        "     Data Type   Count Unique Values  Missing Values\n",
 69 |        "name    object  595500           265               0"
 70 |       ]
 71 |      },
 72 |      "execution_count": 2,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "#DataFrame with columns\n",
 79 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
 80 |     "\n",
 81 |     "#DataFrame with data types\n",
 82 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
 83 |     "\n",
 84 |     "#DataFrame with Count\n",
 85 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
 86 |     "\n",
 87 |     "#DataFrame with unique values\n",
 88 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
 89 |     "for v in list(df.columns.values):\n",
 90 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
 91 |     "\n",
 92 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
 93 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
 94 |     "print('Data Quality Report')\n",
 95 |     "data_quality_report"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.6.5"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 2
127 | }
128 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/packet_filter analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"packet_filter.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>2382</td>\n",
 60 |        "      <td>2382</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>node</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>2382</td>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>filter</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>2382</td>\n",
 74 |        "      <td>1</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>init</th>\n",
 79 |        "      <td>object</td>\n",
 80 |        "      <td>2382</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>success</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>2382</td>\n",
 88 |        "      <td>1</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "  </tbody>\n",
 92 |        "</table>\n",
 93 |        "</div>"
 94 |       ],
 95 |       "text/plain": [
 96 |        "        Data Type  Count Unique Values  Missing Values\n",
 97 |        "ts        float64   2382          2382               0\n",
 98 |        "node       object   2382             1               0\n",
 99 |        "filter     object   2382             1               0\n",
100 |        "init       object   2382             1               0\n",
101 |        "success    object   2382             1               0"
102 |       ]
103 |      },
104 |      "execution_count": 2,
105 |      "metadata": {},
106 |      "output_type": "execute_result"
107 |     }
108 |    ],
109 |    "source": [
110 |     "#DataFrame with columns\n",
111 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
112 |     "\n",
113 |     "#DataFrame with data types\n",
114 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
115 |     "\n",
116 |     "#DataFrame with Count\n",
117 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
118 |     "\n",
119 |     "#DataFrame with unique values\n",
120 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
121 |     "for v in list(df.columns.values):\n",
122 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
123 |     "\n",
124 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
125 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
126 |     "print('Data Quality Report')\n",
127 |     "data_quality_report"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": []
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.6.5"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/Data_analysis/Bro Logs from Threatglass/Part 3/weird analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"weird.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>12873</td>\n",
 60 |        "      <td>12795</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>10491</td>\n",
 67 |        "      <td>10193</td>\n",
 68 |        "      <td>2382</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>10491</td>\n",
 74 |        "      <td>133</td>\n",
 75 |        "      <td>2382</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>float64</td>\n",
 80 |        "      <td>10491</td>\n",
 81 |        "      <td>656</td>\n",
 82 |        "      <td>2382</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>10491</td>\n",
 88 |        "      <td>1019</td>\n",
 89 |        "      <td>2382</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>float64</td>\n",
 94 |        "      <td>10491</td>\n",
 95 |        "      <td>18</td>\n",
 96 |        "      <td>2382</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>name</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>12873</td>\n",
102 |        "      <td>30</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>addl</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>3</td>\n",
109 |        "      <td>2</td>\n",
110 |        "      <td>12870</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>notice</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>12873</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>peer</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>12873</td>\n",
123 |        "      <td>1</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "          Data Type  Count Unique Values  Missing Values\n",
132 |        "ts          float64  12873         12795               0\n",
133 |        "uid          object  10491         10193            2382\n",
134 |        "id.orig_h    object  10491           133            2382\n",
135 |        "id.orig_p   float64  10491           656            2382\n",
136 |        "id.resp_h    object  10491          1019            2382\n",
137 |        "id.resp_p   float64  10491            18            2382\n",
138 |        "name         object  12873            30               0\n",
139 |        "addl         object      3             2           12870\n",
140 |        "notice       object  12873             1               0\n",
141 |        "peer         object  12873             1               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Network analysis/dhcp analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"dhcp.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>1502</td>\n",
 60 |        "      <td>1497</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>1502</td>\n",
 67 |        "      <td>1418</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>1502</td>\n",
 74 |        "      <td>100</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>1502</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>1502</td>\n",
 88 |        "      <td>3</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>1502</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>mac</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>1502</td>\n",
102 |        "      <td>87</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>assigned_ip</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>1502</td>\n",
109 |        "      <td>99</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>lease_time</th>\n",
114 |        "      <td>float64</td>\n",
115 |        "      <td>1502</td>\n",
116 |        "      <td>2</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>trans_id</th>\n",
121 |        "      <td>int64</td>\n",
122 |        "      <td>1502</td>\n",
123 |        "      <td>1476</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "            Data Type  Count Unique Values  Missing Values\n",
132 |        "ts            float64   1502          1497               0\n",
133 |        "uid            object   1502          1418               0\n",
134 |        "id.orig_h      object   1502           100               0\n",
135 |        "id.orig_p       int64   1502             1               0\n",
136 |        "id.resp_h      object   1502             3               0\n",
137 |        "id.resp_p       int64   1502             1               0\n",
138 |        "mac            object   1502            87               0\n",
139 |        "assigned_ip    object   1502            99               0\n",
140 |        "lease_time    float64   1502             2               0\n",
141 |        "trans_id        int64   1502          1476               0"
142 |       ]
143 |      },
144 |      "execution_count": 2,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "kernelspec": {
173 |    "display_name": "Python 3",
174 |    "language": "python",
175 |    "name": "python3"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.6.5"
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 2
192 | }
193 | 


--------------------------------------------------------------------------------
/Data_analysis/Network analysis/ftp analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"ftp.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>5796</td>\n",
 60 |        "      <td>2390</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>5796</td>\n",
 67 |        "      <td>137</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>5796</td>\n",
 74 |        "      <td>15</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>5796</td>\n",
 81 |        "      <td>95</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>5796</td>\n",
 88 |        "      <td>21</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>5796</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>user</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>5796</td>\n",
102 |        "      <td>4</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>password</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>5745</td>\n",
109 |        "      <td>12</td>\n",
110 |        "      <td>51</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>command</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>5796</td>\n",
116 |        "      <td>6</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>arg</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>2966</td>\n",
123 |        "      <td>1545</td>\n",
124 |        "      <td>2830</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>mime_type</th>\n",
128 |        "      <td>object</td>\n",
129 |        "      <td>95</td>\n",
130 |        "      <td>8</td>\n",
131 |        "      <td>5701</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>file_size</th>\n",
135 |        "      <td>float64</td>\n",
136 |        "      <td>105</td>\n",
137 |        "      <td>78</td>\n",
138 |        "      <td>5691</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>reply_code</th>\n",
142 |        "      <td>float64</td>\n",
143 |        "      <td>5756</td>\n",
144 |        "      <td>9</td>\n",
145 |        "      <td>40</td>\n",
146 |        "    </tr>\n",
147 |        "    <tr>\n",
148 |        "      <th>reply_msg</th>\n",
149 |        "      <td>object</td>\n",
150 |        "      <td>5756</td>\n",
151 |        "      <td>4184</td>\n",
152 |        "      <td>40</td>\n",
153 |        "    </tr>\n",
154 |        "    <tr>\n",
155 |        "      <th>passive</th>\n",
156 |        "      <td>object</td>\n",
157 |        "      <td>2897</td>\n",
158 |        "      <td>2</td>\n",
159 |        "      <td>2899</td>\n",
160 |        "    </tr>\n",
161 |        "    <tr>\n",
162 |        "      <th>orig_h</th>\n",
163 |        "      <td>object</td>\n",
164 |        "      <td>2897</td>\n",
165 |        "      <td>19</td>\n",
166 |        "      <td>2899</td>\n",
167 |        "    </tr>\n",
168 |        "    <tr>\n",
169 |        "      <th>resp_h</th>\n",
170 |        "      <td>object</td>\n",
171 |        "      <td>2897</td>\n",
172 |        "      <td>20</td>\n",
173 |        "      <td>2899</td>\n",
174 |        "    </tr>\n",
175 |        "    <tr>\n",
176 |        "      <th>resp_p</th>\n",
177 |        "      <td>float64</td>\n",
178 |        "      <td>2897</td>\n",
179 |        "      <td>2769</td>\n",
180 |        "      <td>2899</td>\n",
181 |        "    </tr>\n",
182 |        "    <tr>\n",
183 |        "      <th>fuid</th>\n",
184 |        "      <td>object</td>\n",
185 |        "      <td>5486</td>\n",
186 |        "      <td>466</td>\n",
187 |        "      <td>310</td>\n",
188 |        "    </tr>\n",
189 |        "  </tbody>\n",
190 |        "</table>\n",
191 |        "</div>"
192 |       ],
193 |       "text/plain": [
194 |        "           Data Type  Count Unique Values  Missing Values\n",
195 |        "ts           float64   5796          2390               0\n",
196 |        "uid           object   5796           137               0\n",
197 |        "id.orig_h     object   5796            15               0\n",
198 |        "id.orig_p      int64   5796            95               0\n",
199 |        "id.resp_h     object   5796            21               0\n",
200 |        "id.resp_p      int64   5796             1               0\n",
201 |        "user          object   5796             4               0\n",
202 |        "password      object   5745            12              51\n",
203 |        "command       object   5796             6               0\n",
204 |        "arg           object   2966          1545            2830\n",
205 |        "mime_type     object     95             8            5701\n",
206 |        "file_size    float64    105            78            5691\n",
207 |        "reply_code   float64   5756             9              40\n",
208 |        "reply_msg     object   5756          4184              40\n",
209 |        "passive       object   2897             2            2899\n",
210 |        "orig_h        object   2897            19            2899\n",
211 |        "resp_h        object   2897            20            2899\n",
212 |        "resp_p       float64   2897          2769            2899\n",
213 |        "fuid          object   5486           466             310"
214 |       ]
215 |      },
216 |      "execution_count": 2,
217 |      "metadata": {},
218 |      "output_type": "execute_result"
219 |     }
220 |    ],
221 |    "source": [
222 |     "#DataFrame with columns\n",
223 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
224 |     "\n",
225 |     "#DataFrame with data types\n",
226 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
227 |     "\n",
228 |     "#DataFrame with Count\n",
229 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
230 |     "\n",
231 |     "#DataFrame with unique values\n",
232 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
233 |     "for v in list(df.columns.values):\n",
234 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
235 |     "\n",
236 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
237 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
238 |     "print('Data Quality Report')\n",
239 |     "data_quality_report"
240 |    ]
241 |   }
242 |  ],
243 |  "metadata": {
244 |   "kernelspec": {
245 |    "display_name": "Python 3",
246 |    "language": "python",
247 |    "name": "python3"
248 |   },
249 |   "language_info": {
250 |    "codemirror_mode": {
251 |     "name": "ipython",
252 |     "version": 3
253 |    },
254 |    "file_extension": ".py",
255 |    "mimetype": "text/x-python",
256 |    "name": "python",
257 |    "nbconvert_exporter": "python",
258 |    "pygments_lexer": "ipython3",
259 |    "version": "3.6.5"
260 |   }
261 |  },
262 |  "nbformat": 4,
263 |  "nbformat_minor": 2
264 | }
265 | 


--------------------------------------------------------------------------------
/Data_analysis/Network analysis/ssh analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 7,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"weird.csv\", low_memory=False)"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 8,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>65983</td>\n",
 60 |        "      <td>44044</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>65526</td>\n",
 67 |        "      <td>51651</td>\n",
 68 |        "      <td>457</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>65526</td>\n",
 74 |        "      <td>221</td>\n",
 75 |        "      <td>457</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>float64</td>\n",
 80 |        "      <td>65526</td>\n",
 81 |        "      <td>25772</td>\n",
 82 |        "      <td>457</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>65526</td>\n",
 88 |        "      <td>279</td>\n",
 89 |        "      <td>457</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>float64</td>\n",
 94 |        "      <td>65526</td>\n",
 95 |        "      <td>183</td>\n",
 96 |        "      <td>457</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>name</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>65983</td>\n",
102 |        "      <td>50</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>addl</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>865</td>\n",
109 |        "      <td>125</td>\n",
110 |        "      <td>65118</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>notice</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>65983</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>peer</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>65983</td>\n",
123 |        "      <td>1</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "          Data Type  Count Unique Values  Missing Values\n",
132 |        "ts          float64  65983         44044               0\n",
133 |        "uid          object  65526         51651             457\n",
134 |        "id.orig_h    object  65526           221             457\n",
135 |        "id.orig_p   float64  65526         25772             457\n",
136 |        "id.resp_h    object  65526           279             457\n",
137 |        "id.resp_p   float64  65526           183             457\n",
138 |        "name         object  65983            50               0\n",
139 |        "addl         object    865           125           65118\n",
140 |        "notice       object  65983             1               0\n",
141 |        "peer         object  65983             1               0"
142 |       ]
143 |      },
144 |      "execution_count": 8,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": []
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.6.5"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 2
199 | }
200 | 


--------------------------------------------------------------------------------
/Data_analysis/Network analysis/tunnel analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"tunnel.csv\")"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>280</td>\n",
 60 |        "      <td>280</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>280</td>\n",
 67 |        "      <td>140</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>280</td>\n",
 74 |        "      <td>3</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>int64</td>\n",
 80 |        "      <td>280</td>\n",
 81 |        "      <td>139</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>280</td>\n",
 88 |        "      <td>81</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>int64</td>\n",
 94 |        "      <td>280</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>tunnel_type</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>280</td>\n",
102 |        "      <td>1</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>action</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>280</td>\n",
109 |        "      <td>2</td>\n",
110 |        "      <td>0</td>\n",
111 |        "    </tr>\n",
112 |        "  </tbody>\n",
113 |        "</table>\n",
114 |        "</div>"
115 |       ],
116 |       "text/plain": [
117 |        "            Data Type  Count Unique Values  Missing Values\n",
118 |        "ts            float64    280           280               0\n",
119 |        "uid            object    280           140               0\n",
120 |        "id.orig_h      object    280             3               0\n",
121 |        "id.orig_p       int64    280           139               0\n",
122 |        "id.resp_h      object    280            81               0\n",
123 |        "id.resp_p       int64    280             1               0\n",
124 |        "tunnel_type    object    280             1               0\n",
125 |        "action         object    280             2               0"
126 |       ]
127 |      },
128 |      "execution_count": 2,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "#DataFrame with columns\n",
135 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
136 |     "\n",
137 |     "#DataFrame with data types\n",
138 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
139 |     "\n",
140 |     "#DataFrame with Count\n",
141 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
142 |     "\n",
143 |     "#DataFrame with unique values\n",
144 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
145 |     "for v in list(df.columns.values):\n",
146 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
147 |     "\n",
148 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
149 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
150 |     "print('Data Quality Report')\n",
151 |     "data_quality_report"
152 |    ]
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.6.5"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 2
176 | }
177 | 


--------------------------------------------------------------------------------
/Data_analysis/Network analysis/weird analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 10 |     "import pandas as pd\n",
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "\n",
 13 |     "df = pd.read_csv(\"weird.csv\", low_memory=False)"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 3,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Data Quality Report\n"
 26 |      ]
 27 |     },
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Data Type</th>\n",
 50 |        "      <th>Count</th>\n",
 51 |        "      <th>Unique Values</th>\n",
 52 |        "      <th>Missing Values</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>ts</th>\n",
 58 |        "      <td>float64</td>\n",
 59 |        "      <td>65983</td>\n",
 60 |        "      <td>44044</td>\n",
 61 |        "      <td>0</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>uid</th>\n",
 65 |        "      <td>object</td>\n",
 66 |        "      <td>65526</td>\n",
 67 |        "      <td>51651</td>\n",
 68 |        "      <td>457</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>id.orig_h</th>\n",
 72 |        "      <td>object</td>\n",
 73 |        "      <td>65526</td>\n",
 74 |        "      <td>221</td>\n",
 75 |        "      <td>457</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>id.orig_p</th>\n",
 79 |        "      <td>float64</td>\n",
 80 |        "      <td>65526</td>\n",
 81 |        "      <td>25772</td>\n",
 82 |        "      <td>457</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>id.resp_h</th>\n",
 86 |        "      <td>object</td>\n",
 87 |        "      <td>65526</td>\n",
 88 |        "      <td>279</td>\n",
 89 |        "      <td>457</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>id.resp_p</th>\n",
 93 |        "      <td>float64</td>\n",
 94 |        "      <td>65526</td>\n",
 95 |        "      <td>183</td>\n",
 96 |        "      <td>457</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>name</th>\n",
100 |        "      <td>object</td>\n",
101 |        "      <td>65983</td>\n",
102 |        "      <td>50</td>\n",
103 |        "      <td>0</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>addl</th>\n",
107 |        "      <td>object</td>\n",
108 |        "      <td>865</td>\n",
109 |        "      <td>125</td>\n",
110 |        "      <td>65118</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>notice</th>\n",
114 |        "      <td>object</td>\n",
115 |        "      <td>65983</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>peer</th>\n",
121 |        "      <td>object</td>\n",
122 |        "      <td>65983</td>\n",
123 |        "      <td>1</td>\n",
124 |        "      <td>0</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "          Data Type  Count Unique Values  Missing Values\n",
132 |        "ts          float64  65983         44044               0\n",
133 |        "uid          object  65526         51651             457\n",
134 |        "id.orig_h    object  65526           221             457\n",
135 |        "id.orig_p   float64  65526         25772             457\n",
136 |        "id.resp_h    object  65526           279             457\n",
137 |        "id.resp_p   float64  65526           183             457\n",
138 |        "name         object  65983            50               0\n",
139 |        "addl         object    865           125           65118\n",
140 |        "notice       object  65983             1               0\n",
141 |        "peer         object  65983             1               0"
142 |       ]
143 |      },
144 |      "execution_count": 3,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "#DataFrame with columns\n",
151 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
152 |     "\n",
153 |     "#DataFrame with data types\n",
154 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
155 |     "\n",
156 |     "#DataFrame with Count\n",
157 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
158 |     "\n",
159 |     "#DataFrame with unique values\n",
160 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
161 |     "for v in list(df.columns.values):\n",
162 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
163 |     "\n",
164 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
165 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
166 |     "print('Data Quality Report')\n",
167 |     "data_quality_report"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "kernelspec": {
173 |    "display_name": "Python 3",
174 |    "language": "python",
175 |    "name": "python3"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.6.5"
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 2
192 | }
193 | 


--------------------------------------------------------------------------------
/Data_analysis/System analysis/auth analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "\n",
 11 |     "df = pd.read_csv(\"auth.csv\")"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 8,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Data Quality Report\n"
 24 |      ]
 25 |     },
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>Data Type</th>\n",
 48 |        "      <th>Count</th>\n",
 49 |        "      <th>Unique Values</th>\n",
 50 |        "      <th>Missing Values</th>\n",
 51 |        "    </tr>\n",
 52 |        "  </thead>\n",
 53 |        "  <tbody>\n",
 54 |        "    <tr>\n",
 55 |        "      <th>ts</th>\n",
 56 |        "      <td>object</td>\n",
 57 |        "      <td>86839</td>\n",
 58 |        "      <td>23140</td>\n",
 59 |        "      <td>0</td>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>ip</th>\n",
 63 |        "      <td>object</td>\n",
 64 |        "      <td>86839</td>\n",
 65 |        "      <td>27895</td>\n",
 66 |        "      <td>0</td>\n",
 67 |        "    </tr>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>daemon</th>\n",
 70 |        "      <td>object</td>\n",
 71 |        "      <td>86839</td>\n",
 72 |        "      <td>18683</td>\n",
 73 |        "      <td>0</td>\n",
 74 |        "    </tr>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>result</th>\n",
 77 |        "      <td>object</td>\n",
 78 |        "      <td>86839</td>\n",
 79 |        "      <td>47907</td>\n",
 80 |        "      <td>0</td>\n",
 81 |        "    </tr>\n",
 82 |        "  </tbody>\n",
 83 |        "</table>\n",
 84 |        "</div>"
 85 |       ],
 86 |       "text/plain": [
 87 |        "       Data Type  Count Unique Values  Missing Values\n",
 88 |        "ts        object  86839         23140               0\n",
 89 |        "ip        object  86839         27895               0\n",
 90 |        "daemon    object  86839         18683               0\n",
 91 |        "result    object  86839         47907               0"
 92 |       ]
 93 |      },
 94 |      "execution_count": 8,
 95 |      "metadata": {},
 96 |      "output_type": "execute_result"
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "#DataFrame with columns\n",
101 |     "columns = pd.DataFrame(list(df.columns.values[1:]))\n",
102 |     "\n",
103 |     "#DataFrame with data types\n",
104 |     "data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])\n",
105 |     "\n",
106 |     "#DataFrame with Count\n",
107 |     "data_count = pd.DataFrame(df.count(), columns=['Count'])\n",
108 |     "\n",
109 |     "#DataFrame with unique values\n",
110 |     "unique_value_counts = pd.DataFrame(columns=['Unique Values'])\n",
111 |     "for v in list(df.columns.values):\n",
112 |     "    unique_value_counts.loc[v] = [df[v].nunique()]\n",
113 |     "\n",
114 |     "missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])\n",
115 |     "data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)\n",
116 |     "print('Data Quality Report')\n",
117 |     "data_quality_report"
118 |    ]
119 |   }
120 |  ],
121 |  "metadata": {
122 |   "kernelspec": {
123 |    "display_name": "Python 3",
124 |    "language": "python",
125 |    "name": "python3"
126 |   },
127 |   "language_info": {
128 |    "codemirror_mode": {
129 |     "name": "ipython",
130 |     "version": 3
131 |    },
132 |    "file_extension": ".py",
133 |    "mimetype": "text/x-python",
134 |    "name": "python",
135 |    "nbconvert_exporter": "python",
136 |    "pygments_lexer": "ipython3",
137 |    "version": "3.7.0"
138 |   }
139 |  },
140 |  "nbformat": 4,
141 |  "nbformat_minor": 2
142 | }
143 | 


--------------------------------------------------------------------------------
/Machine_learning_practice/machine_learning.py:
--------------------------------------------------------------------------------
 1 | # Import packages needed
 2 | import os
 3 | import numpy as np
 4 | import pandas as pd
 5 | from sklearn import svm
 6 | from sklearn import preprocessing
 7 | import time
 8 | 
 9 | # This function builds training dataset and testing dataset
10 | def Build_Data_Set():
11 | 	df = pd.read_csv('../Data/out.csv', index_col=0) 	# 100000 entries
12 | 	df_train = df[:50000] 	# Take last 50000 rows as a training dataset
13 | 	df_test = df[10000:]   # Take first 10000 rows as a testing dataset
14 | 
15 | 	# Clean up training dataset and scale it
16 | 	X_train = np.array(df_train.drop(['classification','usage_counter', 'normal_prio', 'policy', 'vm_pgoff', 'task_size', 'cached_hole_size', 'hiwater_rss', 'nr_ptes', 'lock', 'cgtime', 'signal_nvcsw'], 1))
17 | 	X_train = preprocessing.scale(X_train)
18 | 	# Training label
19 | 	y_train = np.array(df_train['classification'].replace("malware",0).replace("benign",1))
20 | 
21 | 	# Clean up testing dataset and scale it
22 | 	X_test = np.array(df_test.drop(['classification','usage_counter', 'normal_prio', 'policy', 'vm_pgoff', 'task_size', 'cached_hole_size', 'hiwater_rss', 'nr_ptes', 'lock', 'cgtime', 'signal_nvcsw'], 1))
23 | 	X_test = preprocessing.scale(X_test)
24 | 	#Testing label
25 | 	y_test = np.array(df_test['classification'].replace("malware",0).replace("benign",1))
26 | 
27 | 	return X_train, X_test, y_train, y_test # Return arrays
28 | 
29 | # This function builds a machine learning model using scikit-learn svm algorithm and compute the Accuracy of the prediction
30 | def Analysis():
31 | 	test_size = 10000 # The size of the testing dataset
32 | 	X_train, X_test, y_train, y_test = Build_Data_Set() # Building training and testing datasets
33 | 
34 | 	clf = svm.SVC(kernel="linear", C=0.01) # Declare a svm object in with scikit-learn package 
35 | 	clf.fit(X_train, y_train) # Training the model with the traning dataset and labels
36 | 	result = clf.predict(X_test) # Running a prediction with 10000 samples
37 | 
38 | 	# Compute the accuracy and print it out
39 | 	correct_count = 0
40 | 	for i in range(0,test_size):
41 | 		if result[i] == y_test[i]:
42 | 			correct_count += 1
43 | 
44 | 	print("Accuracy:", (correct_count/test_size)*100)
45 | 	return
46 | 
47 | 
48 | start = time.time()
49 | Analysis() # run the program
50 | end = time.time()
51 | 
52 | elapsed = end - start
53 | 
54 | print("Time:",elapsed)
55 | 
56 | 
57 | # import sha3
58 | # import hashlib
59 | # # encoding GeeksforGeeks using md5 hash
60 | # # function 
61 | # df = pd.read_csv('new_data.csv', index_col=0) 	# 100000 entries
62 | 
63 | # # s = hashlib.sha3_512()
64 | # # s.update(b"hello")
65 | # # print(s.hexdigest())
66 | 
67 | # df['hash'] = df['hash'].apply(hash)
68 | # df.to_csv('out.csv')
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MachineLearning - DataSet Quality Research
 2 | 
 3 | This page will contain our progress in creating a report detailing the quality of diferent data sets.
 4 | 
 5 | We have aquired permission from Mike Sconzo, owner of secrepo.com, to use his security datasets to analyze and report on the data. 
 6 | 
 7 | 
 8 | # Security Datasets for Machine Learning
 9 | by Tien Tran, Citlalin Galvan, Vivian Nguyen, Huy Nguyen
10 | 
11 | ## WHY FOCUS ON DATASETS?
12 | Machine Learning is on the rise ⇑
13 | 
14 | A Machine Learning Algorithm can: 
15 | Detect Suspicious Activity  
16 | Stop malicious files from executing
17 | 
18 | The Problem:
19 | One critical problem in Machine Learning is the limited data for security and the quality of training datasets in Cyber Security. Without a good quality dataset, a Machine Learning Algorithm cannot learn properly.
20 | 
21 | ## Collecting the DataSets
22 | Downloading SecRepo’s Datasets
23 | 
24 | PE Malware Dataset
25 | featureExtraction.py
26 | 
27 | Network Dataset
28 | Network_LogtoCSV.py
29 | 
30 | Bro Logs Dataset
31 | Brolog_LogtoCSV.py
32 | 
33 | System Dataset	
34 | System_LogtoCSV.py
35 | System_Squid_LogtoCSV.py
36 | 
37 | ## Analysis Reports 
38 | Detailing the data inside the Datasets with Jupyter Notebook
39 | 
40 | Elements in Data Quality Report:
41 | 
42 | Data Type 
43 | 
44 | Count 
45 | 
46 | Unique Values	 
47 | 
48 | Missing Values 
49 | 
50 | Minimum Values 
51 | 
52 | Maximum Values 
53 | 
54 | ## Description Reports
55 | 
56 | Report Format
57 | 
58 | Abstract
59 | 
60 | Source
61 | 
62 | Dataset Information
63 | 
64 | Attribute Information
65 | 
66 | Relevant Papers
67 | 
68 | Associate Data Science Notebook
69 | 
70 | 


--------------------------------------------------------------------------------
/Scripts/LogToCsv.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | # Create a log file into a csv file so we can manipulate it with pandas
 4 | dhcp_path = '/Users/citlalingalvan/Downloads/dhcp.log'
 5 | with open('output.csv', 'w+', encoding='utf-8') as csvfile:
 6 |     w = csv.writer(csvfile, dialect='excel')
 7 |     with open(dhcp_path, encoding="utf8") as file:
 8 |         lines = file.read().split('\n')
 9 |         files = []
10 |         for line in lines:
11 |             files.append(line.split('\t'))
12 |         w.writerows(files)


--------------------------------------------------------------------------------
/Scripts/LogtoCsvConverter.py:
--------------------------------------------------------------------------------
 1 | # Create a log file into a csv file so we can manipulate it with pandas
 2 | 
 3 | import csv
 4 | import os
 5 | 
 6 | 
 7 | dhcp_path = input("Enter File Path: ")
 8 | 
 9 | print("******Please wait while it is converted******")
10 | 
11 | with open('NewCsvFile.csv', 'w+') as csvfile:
12 |     w = csv.writer(csvfile, dialect='excel')
13 |     with open(dhcp_path) as file:
14 |         lines = file.read().split('\n')
15 |         file = []
16 |         for line in lines:
17 |             file.append(line.split('\t'))
18 |         w.writerows(file)
19 | 
20 | print("*********** Succesfully converted ***********")
21 | 
22 | # rename file
23 | renameFile = input('Rename File *add .csv* : ')
24 | os.rename('NewCsvFile.csv', renameFile)
25 | 
26 | print("Succesfully Renamed: ")
27 | 


--------------------------------------------------------------------------------
/Scripts/NetworkLogToCSV.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import os
 3 | dic = {"dhcp.log":["ts", "uid", "id.orig_h", "id.orig_p", "id.resp_h", "id.resp_p", "mac", "assigned_ip", "lease_time", "trans_id"], "dns.log":["ts", "uid", "id.orig_h", "id.orig_p", "id.resp_h", "id.resp_p", "proto", "port", "query", "qclass", "qclass_name", "qtype", "qtype_name", "rcode",	"rcode_name", "QR",	"AA", "TC",	"RD", "Z", "answers", "TTLs", "rejected"], "ftp.log":["ts", "uid", "id.orig_h", "id.orig_p", "id.resp_h", "id.resp_p", "user", "password", "command", "arg","mime_type", "file_size", "reply_code", "reply_msg", "passive", "orig_h", "resp_h", "resp_p", "fuid"], "ssh.log":["ts", "uid", "id.orig_h", "id.orig_p", "id.resp_h", "id.resp_p", "status", "direction", "client", "server", "resp_size"], "files.log":["ts", "fuid", "tx_hosts", "rx_hosts", "conn_uids", "source", "depth", "analyzers", "mime_type", "filename",	"duration", "local_orig", "is_orig", "seen_bytes", "total_bytes", "missing_bytes", "overflow_bytes", "timedout","parent_fuid", "md5/sha1/sha256", "extracted"], "http.log":["ts", "uid", "id.orig_h", "id.orig_p", "id.resp_h",	"id.resp_p", "trans_depth",	"method", "host", "uri", "referrer", "user_agent",	"request_ body_len", "response_ body_len",	"status_code",	"status_msg", "info_code", "info_msg", "filename",	"tags",	"username",	"password",	"proxied",	"orig_fuids",	"orig_mime_types",	"resp_fuids",	"resp_mime_types"], "notice.log":["ts",	"uid",	"id.orig_h",	"id.orig_p", "id.resp_h", "id.resp_p", "fuid", "file_mime_type", "file_desc",	"proto", "note",	"msg",	"sub",	"src",	"dst",	"p", "n",	"peer_descr",	"actions",	"suppress_for",	"dropped"],"smtp.log":["ts", "uid",	"id.orig_h",	"id.orig_p",	"id.resp_h",	"id.resp_p",	"proto",	"trans_depth",	"helo",	"mailfrom",	"rcptto",	"date",	"from",	"to",	"in_reply_to",	"subject",	"x_originating_ip",	"first_received",	"second_received",	"last_reply",	"path",	"user_agent",	"tls",	"fuids", "is_webmail"],"ssl.log":["ts",	"uid",	"id.orig_h",	"id.orig_p",	"id.resp_h",	"id.resp_p", "version",	"cipher",	"server_name",	"session_id",	"subject",	"issuer_subject",	"not_valid_before",	"not_valid_after",	"last_alert",	"client_subject",	"clnt_issuer_subject",	"cer_hash",	"validation_status"],"tunnel.log":["ts",	"uid",	"id.orig_h",	"id.orig_p",	"id.resp_h",	"id.resp_p",	"tunnel_type",	"action"],"weird.log":["ts",	"uid", "id.orig_h",	"id.orig_p",	"id.resp_h",	"id.resp_p",	"name",	"addl",	"notice",	"peer"]}
 4 | 
 5 | path = "network"
 6 | for filename in os.listdir(path):
 7 | 	with open(path+"/"+filename.replace("log","csv"), 'w+', encoding='utf-8', newline='') as csvfile:
 8 | 		w = csv.writer(csvfile, dialect='excel')
 9 | 		with open(path+"/"+filename, encoding="utf8") as file:
10 | 			lines = file.read().split('\n')
11 | 			lines=lines[:-1]
12 | 	        # print(lines)
13 | 			files = [dic[filename]]
14 | 		for line in lines:
15 | 				cells = []
16 | 				for item in line.split('\t'):
17 | 					if item=="-":
18 | 						cells.append(item.replace("-",""))
19 | 					else:
20 | 						cells.append(item)
21 | 				files.append(cells)
22 | 		w.writerows(files)


--------------------------------------------------------------------------------
/Scripts/System_Squid_LogToCSV.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Mon Aug 09 11:50:42 2018
 4 | 
 5 | @author: tienz
 6 | """
 7 | 
 8 | # Create a log file into a csv file so we can manipulate it with pandas
 9 | 
10 | import csv
11 | import os 
12 | 
13 | path = "access.log"
14 | result = [["time", "elapsed", "remotehost", "code/status", "bytes", "method", "URL", "rfc931", "peerstatus/peerhost", "type"]]
15 | 
16 | 
17 | with open('access.csv', 'w+', newline='') as csvfile:
18 | 	w = csv.writer(csvfile, dialect='excel')
19 | 	with open(path, encoding='utf-8') as file:
20 | 		lines = file.read().split('\n')
21 | 
22 | 		for line in lines:
23 | 			ele = line.split(' ')
24 | 			item=[]
25 | 			for it in ele:
26 | 				if not it == "":
27 | 					if it == "-":
28 | 						item.append("")
29 | 					else:
30 | 						item.append(it)
31 | 			result.append(item)
32 | 		w.writerows(result)
33 | 		
34 | 
35 | 	
36 | 


--------------------------------------------------------------------------------
/Scripts/featureExtraction.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Mon Jul 30 15:39:02 2018
 3 | 
 4 | @author: Cyber Defenders - Team Aladdin
 5 | """
 6 | 
 7 | 
 8 | import json
 9 | import csv
10 | import os
11 | 
12 | 
13 | # Define feature header
14 | result = [['FileName', 'SectionAlignment', 'FileAlignment', 'SizeOfHeaders', 'TimeDateStamp', 'ImageBase', 'SizeOfImage', 'DllCharacteristics', 'Characteristics', 'HighEntropy', 'LowEntropy', 'TotalSuspiciousSections', 'TotalNonSuspiciousSections']]
15 | 
16 | # Define standard sections
17 | standardSection = ['.text', '.rdata', '.data', '.rsrc']
18 | 
19 | # This function is to check if an element exist in a list
20 | def checkExist(listSection, ele):
21 |     for item in listSection:
22 |         if item == ele:
23 |             return True
24 |     return False
25 | # Define the path of the folder which contain json files
26 | path = 'zeus'
27 | 
28 | 
29 | for filename in os.listdir(path):
30 |     file = open("./"+path+"/"+filename, 'r') 
31 |     
32 |     for line in file:
33 |         j = json.loads(line)
34 |         
35 |         # HighEntropy and LowEntropy Extraction
36 |         highEntropy = 0
37 |         lowEntropy = 0
38 |         highest = 0
39 |         lowest = 8
40 |         for item in j['PE Sections']:
41 |             if item['Entropy'] < lowest:
42 |                 lowest = item['Entropy']
43 |             if item['Entropy'] > highest:
44 |                 highest = item['Entropy']
45 | 
46 |         if highest > 7:
47 |             highEntropy = 1
48 |         if lowest < 1:
49 |             lowEntropy = 1
50 | 
51 |         # TotalSuspiciousSections and TotalNonSuspiciousSections extraction
52 |         numberSuspicious = 0
53 |         numberNonSuspicious = 0
54 |         for item in j['PE Sections']:
55 |             if checkExist(standardSection, item['Name']['Value']):
56 |                 numberNonSuspicious += 1
57 |             else:
58 |                 numberSuspicious += 1
59 | 
60 |     
61 |         #SectionAlignment Extraction
62 |         sectionAlignment = j['OPTIONAL_HEADER']['SectionAlignment']['Value']
63 |         
64 |         #FileAlignment Extraction
65 |         fileAlignment = j['OPTIONAL_HEADER']['FileAlignment']['Value']
66 |         
67 |         # SizeOfHeaders Extraction
68 |         sizeOfHeader = j['OPTIONAL_HEADER']['SizeOfHeaders']['Value']
69 |         
70 |         timeStamp = j['FILE_HEADER']['TimeDateStamp']['Value']
71 |         
72 |         # ImageBase Extraction
73 |         imageBase = j['OPTIONAL_HEADER']['ImageBase']['Value']
74 |         
75 |         # SizeOfImage Extraction
76 |         sizeOfImage = j['OPTIONAL_HEADER']['SizeOfImage']['Value']
77 |         
78 |         #DllCharacteristics Extraction
79 |         dllCharacteristics = j['OPTIONAL_HEADER']['DllCharacteristics']['Value']
80 |         
81 |         # Characteristics Extraction
82 |         characteristics = j['FILE_HEADER']['Characteristics']['Value']
83 |         
84 |         row = [filename, sectionAlignment, fileAlignment, sizeOfHeader, timeStamp, imageBase, sizeOfImage,  dllCharacteristics, characteristics, highEntropy, lowEntropy, numberSuspicious, numberNonSuspicious]
85 |         # print(row)
86 |         result.append(row)
87 |     
88 | with open('Zeus.csv', 'w+', newline='') as f:
89 |     thewriter = csv.writer(f)
90 |     
91 |     thewriter.writerows(result)
92 |     
93 |     
94 |     
95 |         
96 |     
97 |     
98 | 


--------------------------------------------------------------------------------
/Scripts/html_Generator.py:
--------------------------------------------------------------------------------
  1 | import gspread
  2 | import pandas as pd
  3 | from oauth2client.service_account import ServiceAccountCredentials
  4 | 
  5 | # define scopes accept requests
  6 | scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] 
  7 | 
  8 | # create credentials from json keyfile
  9 | credentials = ServiceAccountCredentials.from_json_keyfile_name('your key file name', scope)
 10 | 
 11 | # authorize the credential
 12 | gc = gspread.authorize(credentials)
 13 | 
 14 | # Open spreadsheet
 15 | wks = gc.open('Your spreadsheet name').sheet1
 16 | 
 17 | # Read each row of the spreadsheet
 18 | for i in range(0,len(wks.get_all_records())):
 19 | 	row = wks.get_all_records()[i]
 20 | 
 21 | 	# data quality table
 22 | 	df = pd.read_csv("../network/"+row["Name of dataset"].replace(" ","")+".csv")
 23 | 	columns = pd.DataFrame(list(df.columns.values[1:]))
 24 | 	#DataFrame with data types
 25 | 	data_types = pd.DataFrame(df.dtypes, columns=['Data Type'])
 26 | 
 27 | 	#DataFrame with Count
 28 | 	data_count = pd.DataFrame(df.count(), columns=['Count'])
 29 | 
 30 | 	#DataFrame with unique values
 31 | 	unique_value_counts = pd.DataFrame(columns=['Unique Values'])
 32 | 	for v in list(df.columns.values):
 33 | 	    unique_value_counts.loc[v] = [df[v].nunique()]
 34 | 
 35 | 	missing_data_counts = pd.DataFrame(df.isnull().sum(), columns=['Missing Values'])
 36 | 	data_quality_report = data_types.join(data_count).join(unique_value_counts).join(missing_data_counts)
 37 | 	
 38 | 	# Open a new html file
 39 | 	f = open(row["Name of dataset"].replace(" ","")+".html",'w')
 40 | 
 41 | 	'''
 42 | 	Customize content of html files
 43 | 	'''
 44 | 	message = "<html style='margin-left:50px'>"
 45 | 	message += "<head></head>"
 46 | 	message += "<body>" 
 47 | 	message += "<a href='http://www.secrepo.com/'>Home</a>" 
 48 | 	message += "<h1>" + row["Name of dataset"].upper() + "</h1>"
 49 | 	message += "<p>Download: <a href='http://www.secrepo.com'>"+row["Name of dataset"]+" </a> Zip File</p>"
 50 | 	message += "<h3>Abstract</h3>"
 51 | 	message += "<ul>"
 52 | 	message += "<table style='border: 1px solid black'>"
 53 | 	message += "<tr>"
 54 | 	message += "<th style='border: 1px solid black'>Number of Instances:</th>"
 55 | 	message += "<th style='border: 1px solid black'>"+str(row['num of instances'])+"</th>"
 56 | 	message += "<th style='border: 1px solid black'>Security Area:</th>"
 57 | 	message += "<th style='border: 1px solid black'>"+row['Security Area']+"</th>"
 58 | 	message += "</tr>"
 59 | 	message += "<tr>"
 60 | 	message += "<th style='border: 1px solid black'>Number of Attributes: </th>"
 61 | 	message += "<th style='border: 1px solid black'>"+str(row['Number of attributes'])+"</th>"
 62 | 	message += "<th style='border: 1px solid black'>Date Donated: </th>"
 63 | 	message += "<th style='border: 1px solid black'>"+ str(row['Data Donated'])+"</th>"
 64 | 	message += "</tr>"
 65 | 	message += "<tr>"
 66 | 	message += "<th style='border: 1px solid black'>Missing Values? </th>"
 67 | 	message += "<th style='border: 1px solid black'>"+str(row['Missing data'])+"</th>"
 68 | 	message += "<th style='border: 1px solid black'>Associated ML Tasks: </th>"
 69 | 	message += "<th style='border: 1px solid black'>"+ str(row['Associated Tasks'])+"</th>"
 70 | 	message += "</tr>"
 71 | 	message += "</table>"
 72 | 	message += "</ul>"
 73 | 	message += "<h3>Source</h3>"
 74 | 
 75 | 	# Split elements of Source section
 76 | 	source = row["Source"].split(",")
 77 | 	for item in source:
 78 | 		message += "<p style='width: 600px'>"+item+"</p>"
 79 | 	message += "<h3>Dataset Information</h3>"
 80 | 	message += "<p style='width: 600px'>"+row["Information"]+"</p>"
 81 | 	message += "<h3>Attribute Information</h3>"
 82 | 	message += "<ul>"
 83 | 	message += data_quality_report.to_html()
 84 | 	message += "</ul>"
 85 | 	message += "<h3>Relevant Papers</h3>"
 86 | 
 87 | 	# Split elemnts Relevant Papers section
 88 | 	relevant = row["Relevant Papers"].split(";")
 89 | 	for item in relevant:
 90 | 		message += "<p style='width: 600px'>"+item.split("|")[0]+"<a href='"+item.split("|")[1]+"'>"+item.split("|")[1]+"</a></p>"
 91 | 	message += "<h3>Associate Data Science Notebook</h3>"
 92 | 	message += "<a href='"+row['Associate Data Science Notebook:']+"'>"+row['Associate Data Science Notebook:']+"</a>"
 93 | 	message += "</body>"
 94 | 	message += "</html>"
 95 | 
 96 | 	# Write the content into the file
 97 | 	f.write(message)
 98 | 
 99 | 	# Close the file after work
100 | 	f.close()


--------------------------------------------------------------------------------