├── .gitattributes ├── .gitignore ├── LICENSE ├── MalwareArtifacts.csv ├── README.md ├── cybermachine.py ├── extractPE.py ├── malwareML.py ├── requirements.txt ├── spam.csv ├── spamML.py ├── urlML.py └── url_spam_classification.csv /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Emrah Yıldırım 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

CyberMachine 🤖

2 | Ekran Resmi 2022-04-14 18 33 25 3 | 4 | 5 | 6 | 7 |

About The Project 📰

8 | 9 | Detects cyber threats to the end user with machine learning. This tool can do malware analysis of given exe file, spam analysis of given url and mail. 10 | 11 | 12 |
13 | 14 |

Installation ⏳

15 | 16 | * Clone the repo 17 | 18 | ```sh 19 | git clone https://github.com/emr4h/CyberMachine.git 20 | ``` 21 | 22 |
23 | 24 |

Getting Started 🕵️‍♂️

25 | 26 | * Go to the CyberMachine folder: 27 | ```sh 28 | cd CyberMachine 29 | ``` 30 | 31 | * Please upload requirements 32 | 33 | ```sh 34 | pip3 install requirements.txt 35 | ``` 36 | 37 |
38 | 39 |

Usage 👨🏻‍💻

40 | 41 | 42 | * Then you can access the help menu with the command below: 43 | 44 | ```sh 45 | python3 cybermachine.py --help 46 | ``` 47 | 48 |
49 | 50 | * Malware Analysis with ML : 51 | 52 | ```sh 53 | python3 cybermachine.py --exe 54 | ``` 55 | 56 | https://user-images.githubusercontent.com/60710585/163443140-a43f407c-ade6-48e0-a87f-c431c2c4fb50.mp4 57 | 58 |
59 | 60 | * Mail Analysis with ML : 61 | 62 | ```sh 63 | python3 cybermachine.py --mail <"message"> 64 | ``` 65 | 66 | https://user-images.githubusercontent.com/60710585/163433972-d560bfff-b8a6-4215-a502-041203244836.mp4 67 | 68 |
69 | 70 | * Url Analysis with ML : 71 | 72 | ```sh 73 | python3 cybermachine.py --url <"link"> 74 | ``` 75 | 76 | https://user-images.githubusercontent.com/60710585/163436436-95537447-eb14-4c8a-8aa1-11e4cc86dc1a.mp4 77 | 78 |
79 | 80 | 81 |

Details 👀

82 | 83 | If you are curious about the machine learning applications, success rates and analysis approaches used in the project, you can review my repositories below. 84 | 85 |
86 | * Malware Analysis with Machine Learning 87 | 88 | ```sh 89 | https://github.com/emr4h/Malware-Detection-Using-Machine-Learning 90 | ``` 91 |
92 | * Spam Analysis with Machine Learning 93 | 94 | ```sh 95 | https://github.com/emr4h/Spam-Email-and-Url-Detection-Using-Machine-Learning 96 | ``` 97 | 98 |
99 | 100 |

Support 🎗

101 | 102 | If you like the project, please give a star ⭐️ and don't forget to buy me a coffee ☕️ 103 | 104 |

emr4h


105 | 106 | 107 |

Follow me:

108 |

109 | emrahyldrw 110 | emr4h 111 | sapkalihacker 112 |

113 | 114 | -------------------------------------------------------------------------------- /cybermachine.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import subprocess 4 | import random 5 | from pyfiglet import Figlet 6 | from extractPE import fileExtract 7 | from malwareML import machineLearnMalware 8 | from spamML import machineLearnSpam 9 | from urlML import machineLearnUrl 10 | 11 | 12 | print("\n\n\n") 13 | 14 | fontList = ["big","bulbhead","roman","epic","larry3d","speed","nancyj","stampatello","smslant","slscript","serifcap","rounded","puffy","o8","letters","colossal","basic"] 15 | fontType = random.choice(fontList) 16 | f = Figlet(font=fontType) 17 | print(f.renderText('Cyber Machine')) 18 | 19 | print("by emr4h\n") 20 | 21 | parser = argparse.ArgumentParser(prog="hackwall\n", description="Threat Analysis Tool for End User", usage="\n\n Malware Analysis with ML: python3 cybermachine.py --exe \n Email Analysis with ML: python3 cybermachine.py --mail \n Url Analysis with ML: python3 cybermachine.py --url ") 22 | parser.add_argument("--exe", help = "Malware Analysis with ML, give value in exe file type") 23 | parser.add_argument("--mail", type=str, help = "Email Spam Analysis with ML, give value in string type ") 24 | parser.add_argument("--url", type=str, help = "Url Spam Analysis with ML, give value in string type ") 25 | 26 | 27 | args = parser.parse_args() 28 | 29 | 30 | def analysisMalware(argument): 31 | fileExtract(argument) 32 | result = machineLearnMalware() 33 | if(result >=2): 34 | print("ML Prediction --> Malware.\n") 35 | else: 36 | print("ML Prediction --> Secure.\n") 37 | subprocess.call(["rm", "inputData.csv"]) 38 | 39 | 40 | def analysisSpam(argument): 41 | result = machineLearnSpam(argument) 42 | if(result >=2): 43 | print("ML Prediction --> Spam.\n") 44 | else: 45 | print("ML Prediction --> Secure.\n") 46 | 47 | def analysisUrl(argument): 48 | result = machineLearnUrl(argument) 49 | if(result >=2): 50 | print("ML Prediction --> Spam.\n") 51 | else: 52 | print("ML Prediction --> Secure.\n") 53 | 54 | 55 | if __name__=='__main__': 56 | 57 | if(args.exe): 58 | analysisMalware(args.exe) 59 | 60 | if(args.mail): 61 | analysisSpam(args.mail) 62 | 63 | if(args.url): 64 | analysisUrl(args.url) 65 | 66 | -------------------------------------------------------------------------------- /extractPE.py: -------------------------------------------------------------------------------- 1 | import pefile 2 | import csv 3 | 4 | def fileExtract(data): 5 | print("Extracting the PE information of the file...") 6 | header =["AddressOfEntryPoint","MajorLinkerVersion","MajorImageVersion","MajorOperatingSystemVersion","DllCharacteristics","SizeOfStackReserve","NumberOfSections","ResourceSize","IfMalware"] 7 | with open('inputData.csv', 'w', encoding='UTF8', newline='') as f: 8 | writer = csv.writer(f) 9 | 10 | # header bilgilerini ekledik : 11 | writer.writerow(header) 12 | 13 | # zararlı yazılımların bilgilerini ekledik : 14 | pe = pefile.PE(data) 15 | a = str(pe.OPTIONAL_HEADER.AddressOfEntryPoint) 16 | b = str(pe.OPTIONAL_HEADER.MajorLinkerVersion) 17 | c = str(pe.OPTIONAL_HEADER.MajorImageVersion) 18 | d = str(pe.OPTIONAL_HEADER.MajorOperatingSystemVersion) 19 | e = str(pe.OPTIONAL_HEADER.DllCharacteristics) 20 | f = str(pe.OPTIONAL_HEADER.SizeOfStackReserve) 21 | g = str(pe.FILE_HEADER.NumberOfSections) 22 | h = str(pe.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size) 23 | i = " " # zararlı bilgisini gösterir. 24 | inputData = [a,b,c,d,e,f,g,h,i] 25 | writer.writerow(inputData) 26 | print("The file was successfully extracted.") 27 | 28 | -------------------------------------------------------------------------------- /malwareML.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn import tree 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn.neighbors import KNeighborsClassifier 6 | 7 | 8 | def machineLearnMalware() : 9 | 10 | dataSet = pd.read_csv('../CyberMachine/MalwareArtifacts.csv') 11 | fileData = pd.read_csv('../CyberMachine/inputData.csv') 12 | 13 | features = dataSet.iloc[:,[0,1,2,3,4,5,6,7]].values 14 | ifMalware = dataSet.iloc[:,8].values 15 | 16 | fileFeatures = fileData.iloc[:,[0,1,2,3,4,5,6,7]].values 17 | 18 | print("The model is training using a total of 137444 data ...\n") 19 | print("Prediction using Decision Trees ...\n") 20 | 21 | dtModel = tree.DecisionTreeClassifier() 22 | dtModel.fit(features, ifMalware) 23 | 24 | dtpredict = dtModel.predict(fileFeatures) 25 | print(dtpredict) 26 | print("\n") 27 | 28 | print("Prediction using Random Forest ...\n") 29 | 30 | rfModel = RandomForestClassifier() 31 | rfModel.fit(features, ifMalware) 32 | rfpredict = rfModel.predict(fileFeatures) 33 | print(rfpredict) 34 | print("\n") 35 | 36 | 37 | print("Prediction using Kneighbors ...\n") 38 | 39 | knnModel = KNeighborsClassifier(n_neighbors=1) 40 | knnModel.fit(features, ifMalware) 41 | knpredict = knnModel.predict(fileFeatures) 42 | print(knpredict) 43 | print("\n") 44 | 45 | predict = int(knpredict + dtpredict + rfpredict) 46 | return predict 47 | 48 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.2 2 | pandas==1.3.2 3 | pefile==2021.5.24 4 | pyfiglet==0.7 5 | scikit_learn==1.0.2 6 | -------------------------------------------------------------------------------- /spamML.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn import tree 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn import svm 6 | from sklearn.feature_extraction.text import CountVectorizer 7 | 8 | 9 | def machineLearnSpam(message): 10 | 11 | dataSet = pd.read_csv('../CyberMachine/spam.csv') 12 | dataSet.Category = dataSet.Category.apply(lambda x: 1 if x == 'spam' else 0) 13 | features = dataSet.iloc[:,1] 14 | ifSpam = dataSet.iloc[:,0] 15 | cv = CountVectorizer() 16 | features = cv.fit_transform(features) 17 | userInput = cv.transform([message]) 18 | print("The model is training using a total of 5572 data...\n") 19 | 20 | print("Prediction using Decision Trees ...") 21 | 22 | dtModel = tree.DecisionTreeClassifier() 23 | dtModel.fit(features, ifSpam) 24 | dtPredict = dtModel.predict(userInput) 25 | print(dtPredict) 26 | print("\n") 27 | 28 | print("Prediction using Random Forest ...") 29 | 30 | rfModel = RandomForestClassifier() 31 | rfModel.fit(features, ifSpam) 32 | rfPredict = rfModel.predict(userInput) 33 | print(rfPredict) 34 | print("\n") 35 | 36 | 37 | print("Prediction using Support Vector Machine ...") 38 | 39 | svcModel = svm.SVC() 40 | svcModel.fit(features, ifSpam) 41 | svcPredict = svcModel.predict(userInput) 42 | print(svcPredict) 43 | print("\n") 44 | 45 | predict = int(svcPredict + dtPredict + rfPredict) 46 | return predict 47 | 48 | -------------------------------------------------------------------------------- /urlML.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.naive_bayes import MultinomialNB 5 | from sklearn.svm import LinearSVC 6 | from sklearn.linear_model import SGDClassifier 7 | 8 | 9 | def machineLearnUrl(userInput): 10 | 11 | url = pd.read_csv('../CyberMachine/url_spam_classification.csv') 12 | 13 | url['is_spam'] = url.is_spam.apply(str) 14 | url['is_spam'] = url['is_spam'].apply(lambda x : 1 if x == "True" in x else 0) 15 | 16 | urls = url.iloc[:,0] 17 | ifSpam = url.iloc[:,1] 18 | 19 | def extractUrl(data): 20 | url = str(data) 21 | extractSlash = url.split('/') 22 | result = [] 23 | 24 | for i in extractSlash: 25 | extractDash = str(i).split('-') 26 | dotExtract = [] 27 | 28 | for j in range(0,len(extractDash)): 29 | extractDot = str(extractDash[j]).split('.') 30 | dotExtract += extractDot 31 | 32 | result += extractDash + dotExtract 33 | result = list(set(result)) 34 | 35 | return result 36 | 37 | cv = CountVectorizer(tokenizer=extractUrl) 38 | 39 | print("The model is training using a total of 148303 url data ...\n") 40 | 41 | features = cv.fit_transform(urls) 42 | features_test = cv.transform([userInput]) 43 | 44 | print("Prediction using Stochastic Gradient Descent ...") 45 | 46 | sgdcModel = SGDClassifier() 47 | sgdcModel.fit(features, ifSpam) 48 | sgdcPredict = sgdcModel.predict(features_test) 49 | print(sgdcPredict) 50 | print("\n") 51 | 52 | print("Prediction using Decision Trees ...") 53 | 54 | nbModel = MultinomialNB() 55 | nbModel.fit(features, ifSpam) 56 | nbPredict = nbModel.predict(features_test) 57 | print(nbPredict) 58 | print("\n") 59 | 60 | print("Prediction using Linear Support Vector Machine ...") 61 | 62 | lsvcModel = LinearSVC() 63 | lsvcModel.fit(features, ifSpam) 64 | lsvcPredict = lsvcModel.predict(features_test) 65 | print(lsvcPredict) 66 | print("\n") 67 | 68 | predict = int(lsvcPredict + nbPredict + sgdcPredict) 69 | return predict 70 | 71 | --------------------------------------------------------------------------------