├── .gitattributes
├── .gitignore
├── LICENSE
├── MalwareArtifacts.csv
├── README.md
├── cybermachine.py
├── extractPE.py
├── malwareML.py
├── requirements.txt
├── spam.csv
├── spamML.py
├── urlML.py
└── url_spam_classification.csv


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 
141 | # pytype static type analyzer
142 | .pytype/
143 | 
144 | # Cython debug symbols
145 | cython_debug/
146 | 
147 | # PyCharm
148 | #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
151 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
152 | #.idea/
153 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Emrah Yıldırım
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="left">CyberMachine 🤖</h1>
  2 | <img width="1026" alt="Ekran Resmi 2022-04-14 18 33 25" src="https://user-images.githubusercontent.com/60710585/163424431-3da87b14-2053-4733-bb85-d1bf62b31690.png">
  3 | 
  4 | 
  5 | <!-- ABOUT THE PROJECT -->
  6 | 
  7 | <h2 align="left">About The Project 📰</h2>
  8 | 
  9 |  Detects cyber threats to the end user with machine learning. This tool can do malware analysis of given exe file, spam analysis of given url and mail.
 10 |  
 11 | 
 12 | <br>
 13 | <!-- INSTALL -->
 14 | <h2 align="left">Installation ⏳</h2>
 15 | 
 16 | * Clone the repo
 17 |  
 18 | ```sh
 19 | git clone https://github.com/emr4h/CyberMachine.git
 20 | ```
 21 | 
 22 | <br>
 23 | <!-- GETTING STARTED -->
 24 | <h2 align="left">Getting Started 🕵️‍♂️</h2>
 25 | 
 26 |  * Go to the CyberMachine folder:
 27 | ```sh
 28 | cd CyberMachine
 29 | ```
 30 | 
 31 |  * Please upload requirements
 32 | 
 33 | ```sh
 34 | pip3 install requirements.txt
 35 | ```
 36 |  
 37 | <br>
 38 | <!-- USAGE EXAMPLES -->
 39 | <h2 align="left">Usage 👨🏻‍💻</h2>
 40 | 
 41 | 
 42 | * Then you can access the help menu with the command below:
 43 | 
 44 | ```sh
 45 | python3 cybermachine.py --help
 46 | ```
 47 | 
 48 | <br>
 49 | 
 50 | * Malware Analysis with ML :
 51 | 
 52 | ```sh
 53 | python3 cybermachine.py --exe <file_path> 
 54 | ```
 55 | 
 56 | https://user-images.githubusercontent.com/60710585/163443140-a43f407c-ade6-48e0-a87f-c431c2c4fb50.mp4
 57 | 
 58 | <br>
 59 | 
 60 | * Mail Analysis with ML :
 61 | 
 62 | ```sh
 63 | python3 cybermachine.py --mail <"message"> 
 64 | ```
 65 | 
 66 | https://user-images.githubusercontent.com/60710585/163433972-d560bfff-b8a6-4215-a502-041203244836.mp4
 67 | 
 68 | <br>
 69 | 
 70 | * Url Analysis with ML :
 71 | 
 72 | ```sh
 73 | python3 cybermachine.py --url <"link"> 
 74 | ```
 75 | 
 76 | https://user-images.githubusercontent.com/60710585/163436436-95537447-eb14-4c8a-8aa1-11e4cc86dc1a.mp4
 77 | 
 78 | <br>
 79 | 
 80 | <!-- Details -->
 81 | <h2 align="left">Details 👀</h2>
 82 | 
 83 | If you are curious about the machine learning applications, success rates and analysis approaches used in the project, you can review my repositories below.
 84 | 
 85 | <br>
 86 | * Malware Analysis with Machine Learning
 87 |  
 88 |    ```sh
 89 |    https://github.com/emr4h/Malware-Detection-Using-Machine-Learning
 90 |    ```
 91 | <br>
 92 | * Spam Analysis with Machine Learning
 93 |  
 94 |    ```sh
 95 |    https://github.com/emr4h/Spam-Email-and-Url-Detection-Using-Machine-Learning 
 96 |    ```
 97 | 
 98 | <br>
 99 | <!-- Support -->
100 | <h2 align="left">Support 🎗</h2>
101 | 
102 | If you like the project, please give a star ⭐️ and don't forget to buy me a coffee ☕️ 
103 | 
104 | <p align="left"><a href="https://www.buymeacoffee.com/emr4h"> <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" height="50" width="210" alt="emr4h" /></a></p><br>
105 | 
106 | 
107 | <h3 align="left">Follow me:</h3>
108 | <p align="left">
109 | <a href="https://twitter.com/emrahyldrw" target="blank"><img align="center" src="https://raw.githubusercontent.com/rahuldkjain/github-profile-readme-generator/master/src/images/icons/Social/twitter.svg" alt="emrahyldrw" height="30" width="40" /></a>
110 | <a href="https://linkedin.com/in/emr4h" target="blank"><img align="center" src="https://raw.githubusercontent.com/rahuldkjain/github-profile-readme-generator/master/src/images/icons/Social/linked-in-alt.svg" alt="emr4h" height="30" width="40" /></a>
111 | <a href="https://instagram.com/sapkalihacker" target="blank"><img align="center" src="https://raw.githubusercontent.com/rahuldkjain/github-profile-readme-generator/master/src/images/icons/Social/instagram.svg" alt="sapkalihacker" height="30" width="40" /></a>
112 | </p>
113 | 
114 | 


--------------------------------------------------------------------------------
/cybermachine.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | import subprocess
 4 | import random
 5 | from pyfiglet import Figlet
 6 | from extractPE import fileExtract
 7 | from malwareML import machineLearnMalware
 8 | from spamML import machineLearnSpam
 9 | from urlML import machineLearnUrl
10 | 
11 | 
12 | print("\n\n\n")
13 | 
14 | fontList = ["big","bulbhead","roman","epic","larry3d","speed","nancyj","stampatello","smslant","slscript","serifcap","rounded","puffy","o8","letters","colossal","basic"]
15 | fontType = random.choice(fontList)
16 | f = Figlet(font=fontType)
17 | print(f.renderText('Cyber Machine'))
18 | 
19 | print("by emr4h\n")
20 | 
21 | parser = argparse.ArgumentParser(prog="hackwall\n", description="Threat Analysis Tool for End User", usage="\n\n Malware Analysis with ML:  python3 cybermachine.py --exe <file_path> \n Email Analysis with ML:  python3 cybermachine.py --mail <string> \n Url Analysis with ML: python3 cybermachine.py --url <string>")
22 | parser.add_argument("--exe", help = "Malware Analysis with ML, give value in exe file type")
23 | parser.add_argument("--mail", type=str, help = "Email Spam Analysis with ML, give value in string type ")
24 | parser.add_argument("--url", type=str, help = "Url Spam Analysis with ML, give value in string type ")
25 | 
26 | 
27 | args = parser.parse_args() 
28 | 
29 | 
30 | def analysisMalware(argument):
31 |     fileExtract(argument)
32 |     result = machineLearnMalware()
33 |     if(result >=2):
34 |         print("ML Prediction --> Malware.\n")
35 |     else:
36 |         print("ML Prediction --> Secure.\n")
37 |     subprocess.call(["rm", "inputData.csv"])
38 | 
39 | 
40 | def analysisSpam(argument):
41 |     result = machineLearnSpam(argument)
42 |     if(result >=2):
43 |         print("ML Prediction --> Spam.\n")
44 |     else:
45 |         print("ML Prediction --> Secure.\n")
46 | 
47 | def analysisUrl(argument):
48 |     result = machineLearnUrl(argument)
49 |     if(result >=2):
50 |         print("ML Prediction --> Spam.\n")
51 |     else:
52 |         print("ML Prediction --> Secure.\n")
53 | 
54 | 
55 | if __name__=='__main__':
56 |     
57 |     if(args.exe):
58 |         analysisMalware(args.exe)
59 | 
60 |     if(args.mail):
61 |         analysisSpam(args.mail)
62 |     
63 |     if(args.url):
64 |         analysisUrl(args.url)
65 | 
66 | 


--------------------------------------------------------------------------------
/extractPE.py:
--------------------------------------------------------------------------------
 1 | import pefile
 2 | import csv
 3 | 
 4 | def fileExtract(data):
 5 |     print("Extracting the PE information of the file...")
 6 |     header =["AddressOfEntryPoint","MajorLinkerVersion","MajorImageVersion","MajorOperatingSystemVersion","DllCharacteristics","SizeOfStackReserve","NumberOfSections","ResourceSize","IfMalware"]
 7 |     with open('inputData.csv', 'w', encoding='UTF8', newline='') as f:
 8 |         writer = csv.writer(f)
 9 | 
10 |         # header bilgilerini ekledik :
11 |         writer.writerow(header)
12 | 
13 |         # zararlı yazılımların bilgilerini ekledik :
14 |         pe = pefile.PE(data)
15 |         a = str(pe.OPTIONAL_HEADER.AddressOfEntryPoint)
16 |         b = str(pe.OPTIONAL_HEADER.MajorLinkerVersion)
17 |         c = str(pe.OPTIONAL_HEADER.MajorImageVersion)
18 |         d = str(pe.OPTIONAL_HEADER.MajorOperatingSystemVersion)
19 |         e = str(pe.OPTIONAL_HEADER.DllCharacteristics)
20 |         f = str(pe.OPTIONAL_HEADER.SizeOfStackReserve)
21 |         g = str(pe.FILE_HEADER.NumberOfSections)
22 |         h = str(pe.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size)
23 |         i = " " # zararlı bilgisini gösterir.
24 |         inputData = [a,b,c,d,e,f,g,h,i]
25 |         writer.writerow(inputData)
26 |     print("The file was successfully extracted.")
27 | 
28 | 


--------------------------------------------------------------------------------
/malwareML.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn import tree
 4 | from sklearn.ensemble import RandomForestClassifier
 5 | from sklearn.neighbors import KNeighborsClassifier
 6 | 
 7 | 
 8 | def machineLearnMalware() :
 9 | 
10 |     dataSet = pd.read_csv('../CyberMachine/MalwareArtifacts.csv')
11 |     fileData = pd.read_csv('../CyberMachine/inputData.csv')
12 | 
13 |     features = dataSet.iloc[:,[0,1,2,3,4,5,6,7]].values
14 |     ifMalware = dataSet.iloc[:,8].values
15 | 
16 |     fileFeatures = fileData.iloc[:,[0,1,2,3,4,5,6,7]].values
17 | 
18 |     print("The model is training using a total of 137444 data ...\n")
19 |     print("Prediction using Decision Trees ...\n")
20 | 
21 |     dtModel = tree.DecisionTreeClassifier() 
22 |     dtModel.fit(features, ifMalware) 
23 | 
24 |     dtpredict = dtModel.predict(fileFeatures) 
25 |     print(dtpredict)
26 |     print("\n")
27 | 
28 |     print("Prediction using Random Forest ...\n")
29 | 
30 |     rfModel = RandomForestClassifier() 
31 |     rfModel.fit(features, ifMalware) 
32 |     rfpredict = rfModel.predict(fileFeatures)  
33 |     print(rfpredict)
34 |     print("\n")
35 | 
36 | 
37 |     print("Prediction using Kneighbors ...\n")
38 | 
39 |     knnModel = KNeighborsClassifier(n_neighbors=1)
40 |     knnModel.fit(features, ifMalware)
41 |     knpredict = knnModel.predict(fileFeatures)  
42 |     print(knpredict)
43 |     print("\n")
44 | 
45 |     predict = int(knpredict + dtpredict + rfpredict)
46 |     return predict
47 | 
48 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.2
2 | pandas==1.3.2
3 | pefile==2021.5.24
4 | pyfiglet==0.7
5 | scikit_learn==1.0.2
6 | 


--------------------------------------------------------------------------------
/spamML.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn import tree
 4 | from sklearn.ensemble import RandomForestClassifier
 5 | from sklearn import svm
 6 | from sklearn.feature_extraction.text import CountVectorizer
 7 | 
 8 | 
 9 | def machineLearnSpam(message):
10 | 
11 |     dataSet = pd.read_csv('../CyberMachine/spam.csv')
12 |     dataSet.Category = dataSet.Category.apply(lambda x: 1 if x == 'spam' else 0)
13 |     features = dataSet.iloc[:,1] 
14 |     ifSpam = dataSet.iloc[:,0] 
15 |     cv = CountVectorizer()
16 |     features = cv.fit_transform(features)
17 |     userInput = cv.transform([message])
18 |     print("The model is training using a total of 5572 data...\n")
19 | 
20 |     print("Prediction using Decision Trees ...")
21 | 
22 |     dtModel = tree.DecisionTreeClassifier() 
23 |     dtModel.fit(features, ifSpam)
24 |     dtPredict = dtModel.predict(userInput)
25 |     print(dtPredict) 
26 |     print("\n")
27 | 
28 |     print("Prediction using Random Forest ...")
29 | 
30 |     rfModel = RandomForestClassifier() 
31 |     rfModel.fit(features, ifSpam) 
32 |     rfPredict = rfModel.predict(userInput)
33 |     print(rfPredict)
34 |     print("\n")
35 | 
36 | 
37 |     print("Prediction using Support Vector Machine ...")
38 | 
39 |     svcModel = svm.SVC()
40 |     svcModel.fit(features, ifSpam)
41 |     svcPredict = svcModel.predict(userInput)
42 |     print(svcPredict)
43 |     print("\n")
44 | 
45 |     predict = int(svcPredict + dtPredict + rfPredict)
46 |     return predict
47 | 
48 | 


--------------------------------------------------------------------------------
/urlML.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.naive_bayes import MultinomialNB
 5 | from sklearn.svm import LinearSVC
 6 | from sklearn.linear_model import SGDClassifier
 7 | 
 8 | 
 9 | def machineLearnUrl(userInput):
10 | 
11 |     url = pd.read_csv('../CyberMachine/url_spam_classification.csv')
12 | 
13 |     url['is_spam'] = url.is_spam.apply(str)
14 |     url['is_spam'] = url['is_spam'].apply(lambda x : 1 if x == "True" in x else 0)
15 | 
16 |     urls = url.iloc[:,0]
17 |     ifSpam = url.iloc[:,1]
18 | 
19 |     def extractUrl(data):
20 |         url = str(data)
21 |         extractSlash = url.split('/')
22 |         result = []
23 |         
24 |         for i in extractSlash:
25 |             extractDash = str(i).split('-')
26 |             dotExtract = []
27 |             
28 |             for j in range(0,len(extractDash)):
29 |                 extractDot = str(extractDash[j]).split('.')
30 |                 dotExtract += extractDot
31 |                 
32 |             result += extractDash + dotExtract
33 |         result = list(set(result))
34 | 
35 |         return result
36 | 
37 |     cv = CountVectorizer(tokenizer=extractUrl)
38 | 
39 |     print("The model is training using a total of 148303 url data ...\n")
40 | 
41 |     features = cv.fit_transform(urls)
42 |     features_test = cv.transform([userInput])
43 | 
44 |     print("Prediction using Stochastic Gradient Descent ...")
45 | 
46 |     sgdcModel = SGDClassifier()
47 |     sgdcModel.fit(features, ifSpam)
48 |     sgdcPredict = sgdcModel.predict(features_test)
49 |     print(sgdcPredict) 
50 |     print("\n")
51 | 
52 |     print("Prediction using Decision Trees ...")
53 | 
54 |     nbModel = MultinomialNB()
55 |     nbModel.fit(features, ifSpam)
56 |     nbPredict = nbModel.predict(features_test)
57 |     print(nbPredict) 
58 |     print("\n")
59 | 
60 |     print("Prediction using Linear Support Vector Machine ...")
61 | 
62 |     lsvcModel = LinearSVC()
63 |     lsvcModel.fit(features, ifSpam)
64 |     lsvcPredict = lsvcModel.predict(features_test)
65 |     print(lsvcPredict) 
66 |     print("\n")
67 | 
68 |     predict = int(lsvcPredict + nbPredict + sgdcPredict)
69 |     return predict
70 | 
71 | 


--------------------------------------------------------------------------------