├── Mal-detection-learning.py
├── Mal-detection.py
├── README.md
├── antivirusXml.py
├── classifier
├── classifier.pkl
└── features.pkl
├── data.csv
├── requirements.txt
└── virustotal.py
/Mal-detection-learning.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """antivirus-learning-phase.ipynb
3 | """
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import seaborn as sns
8 | import sklearn.ensemble as ske
9 | from sklearn.feature_selection import SelectFromModel
10 | import matplotlib.pyplot as plt
11 | from sklearn.model_selection import train_test_split
12 | from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
13 | from sklearn.tree import DecisionTreeClassifier
14 | from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
15 | from sklearn.neighbors import KNeighborsClassifier
16 | from sklearn.linear_model import SGDClassifier
17 | from sklearn.naive_bayes import GaussianNB
18 | from sklearn import preprocessing
19 | from sklearn import utils
20 | import joblib
21 | import sys
22 | import pickle
23 | sys.modules['sklearn.externals.joblib'] = joblib
24 |
25 |
26 |
27 | data = pd.read_csv('data.csv',sep="|")
28 | data.head()
29 |
30 |
31 | data.isnull().sum()
32 |
33 | colomuns = ["LoaderFlags","NumberOfRvaAndSizes","SectionsNb","SectionsMeanEntropy","SectionsMinEntropy","SectionsMaxEntropy","SectionsMeanRawsize","SectionMaxRawsize","SectionsMeanVirtualsize","SectionsMinVirtualsize","SectionMaxVirtualsize","ImportsNbDLL","ImportsNb","ImportsNbOrdinal","ExportNb","ResourcesNb","ResourcesMeanEntropy","ResourcesMinEntropy","ResourcesMaxEntropy","ResourcesMeanSize","ResourcesMinSize","ResourcesMaxSize","LoadConfigurationSize","VersionInformationSize","legitimate"]
34 | for c in colomuns:
35 | m=round(data[c].mean(),2)
36 | data= data.fillna(m)
37 |
38 | X = data.drop(['Name', 'md5', 'legitimate'], axis=1).values
39 | y = data['legitimate'].values
40 |
41 | data.dtypes
42 |
43 | sns.countplot(x='legitimate', data=data);
44 |
45 | ex = ExtraTreesClassifier()
46 | lab = preprocessing.LabelEncoder()
47 | y_transformed = lab.fit_transform(y)
48 |
49 | fsel = ex.fit(X,y_transformed)
50 | model = SelectFromModel(fsel, prefit=True)
51 | X_new = model.transform(X)
52 | nb_features = X_new.shape[1]
53 |
54 | X_train, X_test, y_train, y_test = train_test_split(X_new, y ,test_size=0.2)
55 |
56 | features = []
57 |
58 | print('%i features identified as important:' % nb_features)
59 |
60 | indices = np.argsort(fsel.feature_importances_)[::-1][:nb_features]
61 | for f in range(nb_features):
62 | print("%d. feature %s (%f)" % (f + 1, data.columns[2+indices[f]], fsel.feature_importances_[indices[f]]))
63 |
64 | for f in sorted(np.argsort(fsel.feature_importances_)[::-1][:nb_features]):
65 | features.append(data.columns[2+f])
66 |
67 | algorithms = {
68 | "DecisionTree": DecisionTreeClassifier(max_depth=10),
69 | "RandomForest": RandomForestClassifier(n_estimators=50),
70 | "AdaBoost": AdaBoostClassifier(n_estimators=100),
71 | "GNB": GaussianNB()
72 | }
73 |
74 | results = {}
75 | accuracy_test = []
76 | model = []
77 | print("\nNow testing algorithms")
78 | for algo in algorithms:
79 | clf = algorithms[algo]
80 | lab = preprocessing.LabelEncoder()
81 | y_transformed = lab.fit_transform(y_train)
82 | clf.fit(X_train, y_transformed)
83 | pred = clf.predict(X_test)
84 | score = clf.score(X_test, y_test)
85 | results[algo] = score
86 | print("%s : %f %%" % (algo, score*100))
87 | acc = accuracy_score(pred, y_test)
88 | accuracy_test.append(acc)
89 | print('Test Accuracy :\033[32m \033[01m {:.5f}% \033[30m \033[0m'.format(acc*100))
90 | print('\033[01m Classification_report \033[0m')
91 | print(classification_report(y_test, pred))
92 | print('\033[01m Confusion_matrix \033[0m')
93 | cf_matrix = confusion_matrix(y_test, pred)
94 | plot_ = sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True,fmt= '0.2%')
95 | plt.show()
96 | print('\033[31m###################- End -###################\033[0m')
97 |
98 | winner = max(results, key=results.get)
99 | print('\nWinner algorithm is %s with a %f %% success' % (winner, results[winner]*100))
100 |
101 | # Save the algorithm and the feature list for later predictions
102 | print('Saving algorithm and feature list in classifier directory...')
103 | joblib.dump(algorithms[winner], 'classifier.pkl')
104 | open('features.pkl', 'bw').write(pickle.dumps(features))
105 | print('Saved')
106 |
--------------------------------------------------------------------------------
/Mal-detection.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | import pefile
3 | import os
4 | import array
5 | import math
6 | import pickle
7 | import joblib
8 | import sys
9 | import argparse
10 |
11 | def get_entropy(data):
12 | if len(data) == 0:
13 | return 0.0
14 | occurences = array.array('L', [0]*256)
15 | for x in data:
16 | occurences[x if isinstance(x, int) else ord(x)] += 1
17 |
18 | entropy = 0
19 | for x in occurences:
20 | if x:
21 | p_x = float(x) / len(data)
22 | entropy -= p_x*math.log(p_x, 2)
23 |
24 | return entropy
25 |
26 | def get_resources(pe):
27 | """Extract resources :
28 | [entropy, size]"""
29 | resources = []
30 | if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
31 | try:
32 | for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
33 | if hasattr(resource_type, 'directory'):
34 | for resource_id in resource_type.directory.entries:
35 | if hasattr(resource_id, 'directory'):
36 | for resource_lang in resource_id.directory.entries:
37 | data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
38 | size = resource_lang.data.struct.Size
39 | entropy = get_entropy(data)
40 |
41 | resources.append([entropy, size])
42 | except Exception as e:
43 | return resources
44 | return resources
45 |
46 | def get_version_info(pe):
47 | """Return version infos"""
48 | res = {}
49 | for fileinfo in pe.FileInfo:
50 | if fileinfo.Key == 'StringFileInfo':
51 | for st in fileinfo.StringTable:
52 | for entry in st.entries.items():
53 | res[entry[0]] = entry[1]
54 | if fileinfo.Key == 'VarFileInfo':
55 | for var in fileinfo.Var:
56 | res[var.entry.items()[0][0]] = var.entry.items()[0][1]
57 | if hasattr(pe, 'VS_FIXEDFILEINFO'):
58 | res['flags'] = pe.VS_FIXEDFILEINFO.FileFlags
59 | res['os'] = pe.VS_FIXEDFILEINFO.FileOS
60 | res['type'] = pe.VS_FIXEDFILEINFO.FileType
61 | res['file_version'] = pe.VS_FIXEDFILEINFO.FileVersionLS
62 | res['product_version'] = pe.VS_FIXEDFILEINFO.ProductVersionLS
63 | res['signature'] = pe.VS_FIXEDFILEINFO.Signature
64 | res['struct_version'] = pe.VS_FIXEDFILEINFO.StrucVersion
65 | return res
66 |
67 | def extract_infos(fpath):
68 | res = {}
69 | pe = pefile.PE(fpath)
70 | res['Machine'] = pe.FILE_HEADER.Machine
71 | res['SizeOfOptionalHeader'] = pe.FILE_HEADER.SizeOfOptionalHeader
72 | res['Characteristics'] = pe.FILE_HEADER.Characteristics
73 | res['MajorLinkerVersion'] = pe.OPTIONAL_HEADER.MajorLinkerVersion
74 | res['MinorLinkerVersion'] = pe.OPTIONAL_HEADER.MinorLinkerVersion
75 | res['SizeOfCode'] = pe.OPTIONAL_HEADER.SizeOfCode
76 | res['SizeOfInitializedData'] = pe.OPTIONAL_HEADER.SizeOfInitializedData
77 | res['SizeOfUninitializedData'] = pe.OPTIONAL_HEADER.SizeOfUninitializedData
78 | res['AddressOfEntryPoint'] = pe.OPTIONAL_HEADER.AddressOfEntryPoint
79 | res['BaseOfCode'] = pe.OPTIONAL_HEADER.BaseOfCode
80 | try:
81 | res['BaseOfData'] = pe.OPTIONAL_HEADER.BaseOfData
82 | except AttributeError:
83 | res['BaseOfData'] = 0
84 | res['ImageBase'] = pe.OPTIONAL_HEADER.ImageBase
85 | res['SectionAlignment'] = pe.OPTIONAL_HEADER.SectionAlignment
86 | res['FileAlignment'] = pe.OPTIONAL_HEADER.FileAlignment
87 | res['MajorOperatingSystemVersion'] = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
88 | res['MinorOperatingSystemVersion'] = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion
89 | res['MajorImageVersion'] = pe.OPTIONAL_HEADER.MajorImageVersion
90 | res['MinorImageVersion'] = pe.OPTIONAL_HEADER.MinorImageVersion
91 | res['MajorSubsystemVersion'] = pe.OPTIONAL_HEADER.MajorSubsystemVersion
92 | res['MinorSubsystemVersion'] = pe.OPTIONAL_HEADER.MinorSubsystemVersion
93 | res['SizeOfImage'] = pe.OPTIONAL_HEADER.SizeOfImage
94 | res['SizeOfHeaders'] = pe.OPTIONAL_HEADER.SizeOfHeaders
95 | res['CheckSum'] = pe.OPTIONAL_HEADER.CheckSum
96 | res['Subsystem'] = pe.OPTIONAL_HEADER.Subsystem
97 | res['DllCharacteristics'] = pe.OPTIONAL_HEADER.DllCharacteristics
98 | res['SizeOfStackReserve'] = pe.OPTIONAL_HEADER.SizeOfStackReserve
99 | res['SizeOfStackCommit'] = pe.OPTIONAL_HEADER.SizeOfStackCommit
100 | res['SizeOfHeapReserve'] = pe.OPTIONAL_HEADER.SizeOfHeapReserve
101 | res['SizeOfHeapCommit'] = pe.OPTIONAL_HEADER.SizeOfHeapCommit
102 | res['LoaderFlags'] = pe.OPTIONAL_HEADER.LoaderFlags
103 | res['NumberOfRvaAndSizes'] = pe.OPTIONAL_HEADER.NumberOfRvaAndSizes
104 |
105 | # Sections
106 | res['SectionsNb'] = len(pe.sections)
107 | entropy = list(map(lambda x:x.get_entropy(), pe.sections))
108 | res['SectionsMeanEntropy'] = sum(entropy)/float(len(entropy))
109 | res['SectionsMinEntropy'] = min(entropy)
110 | res['SectionsMaxEntropy'] = max(entropy)
111 |
112 |
113 | raw_sizes = list(map(lambda x:x.SizeOfRawData, pe.sections))
114 | res['SectionsMeanRawsize'] = sum(raw_sizes)/float(len(raw_sizes))
115 | res['SectionsMinRawsize'] = min(raw_sizes)
116 | res['SectionsMaxRawsize'] = max(raw_sizes)
117 | virtual_sizes = list(map(lambda x:x.Misc_VirtualSize, pe.sections))
118 | res['SectionsMeanVirtualsize'] = sum(virtual_sizes)/float(len(virtual_sizes))
119 | res['SectionsMinVirtualsize'] = min(virtual_sizes)
120 | res['SectionMaxVirtualsize'] = max(virtual_sizes)
121 |
122 | #Imports
123 | try:
124 | res['ImportsNbDLL'] = len(pe.DIRECTORY_ENTRY_IMPORT)
125 | imports = list(sum([x.imports for x in pe.DIRECTORY_ENTRY_IMPORT], []))
126 | res['ImportsNb'] = len(imports)
127 | res['ImportsNbOrdinal'] = len(list(filter(lambda x:x.name is None, imports)))
128 | except AttributeError:
129 | res['ImportsNbDLL'] = 0
130 | res['ImportsNb'] = 0
131 | res['ImportsNbOrdinal'] = 0
132 |
133 | #Exports
134 | try:
135 | res['ExportNb'] = len(pe.DIRECTORY_ENTRY_EXPORT.symbols)
136 | except AttributeError:
137 | # No export
138 | res['ExportNb'] = 0
139 | #Resources
140 | resources= get_resources(pe)
141 | res['ResourcesNb'] = len(resources)
142 | if len(resources)> 0:
143 | entropy = list(map(lambda x:x[0], resources))
144 | res['ResourcesMeanEntropy'] = sum(entropy)/float(len(entropy))
145 | res['ResourcesMinEntropy'] = min(entropy)
146 | res['ResourcesMaxEntropy'] = max(entropy)
147 | sizes = list(map(lambda x:x[1], resources))
148 | res['ResourcesMeanSize'] = sum(sizes)/float(len(sizes))
149 | res['ResourcesMinSize'] = min(sizes)
150 | res['ResourcesMaxSize'] = max(sizes)
151 | else:
152 | res['ResourcesNb'] = 0
153 | res['ResourcesMeanEntropy'] = 0
154 | res['ResourcesMinEntropy'] = 0
155 | res['ResourcesMaxEntropy'] = 0
156 | res['ResourcesMeanSize'] = 0
157 | res['ResourcesMinSize'] = 0
158 | res['ResourcesMaxSize'] = 0
159 |
160 | # Load configuration size
161 | try:
162 | res['LoadConfigurationSize'] = pe.DIRECTORY_ENTRY_LOAD_CONFIG.struct.Size
163 | except AttributeError:
164 | res['LoadConfigurationSize'] = 0
165 |
166 |
167 | # Version configuration size
168 | try:
169 | version_infos = get_version_info(pe)
170 | res['VersionInformationSize'] = len(version_infos.keys())
171 | except AttributeError:
172 | res['VersionInformationSize'] = 0
173 | return res
174 |
175 | if __name__ == '__main__':
176 | parser = argparse.ArgumentParser(description='Detect malicious files')
177 | parser.add_argument('FILE', help='File to be tested')
178 | args = parser.parse_args()
179 | # Load classifier
180 | clf = joblib.load(os.path.join(
181 | os.path.dirname(os.path.realpath(__file__)),
182 | 'classifier/classifier.pkl'
183 | ))
184 |
185 | with open('classifier/features.pkl', 'rb') as f:
186 | features = pickle.load(f)
187 |
188 | data = extract_infos(args.FILE)
189 | pe_features = list(map(lambda x:data[x], features))
190 |
191 | res= clf.predict([pe_features])[0]
192 | print('The file %s is %s' % (
193 | os.path.basename(sys.argv[1]),
194 | ['malicious', 'legitimate'][res])
195 | )
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
ANTIVIRUSXML
6 |
7 |
8 | File Integrity Monitor with Malware detection using Machine learning
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | # :notebook_with_decorative_cover: Table of Contents
22 |
23 | - [About the Project](#star2-about-the-project)
24 | - [Getting Started](#toolbox-getting-started)
25 | * [Prerequisites](#bangbang-prerequisites)
26 | * [Installation](#gear-installation)
27 | - [Usage](#eyes-usage)
28 | - [Contact](#handshake-contact)
29 | - [Acknowledgements](#gem-acknowledgements)
30 |
31 |
32 |
33 | ## :star2: About the Project
34 |
35 | Overview
36 | ============
37 | This Python project is a project that combine between :
38 | 1. basic file integrity monitor: which it takes two arguments: a directory to scan, and an output file for alerts. The script will recursively scan the given directory and its subdirectories, and will create alerts for any added, removed, or changed files. The script uses the os, sys, and pickle libraries to perform file system operations, as well as the datetime, hashlib, logging, and time libraries for other operations.
39 | 2. malware detection using machine learning : it helps train a classifier to be able to detect [PE files](https://en.wikipedia.org/wiki/Portable_Executable) as either malicious or legitimate. It tries out 6 different classification algorithms before deciding which one to use for prediction by comparing their results.
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | ## :toolbox: Getting Started
55 |
56 |
57 | ### :bangbang: Prerequisites
58 |
59 | This project uses some libraries that you need to install them first :
60 |
61 | ```bash
62 | pip install -r requirements.txt
63 | ```
64 |
65 |
66 | ### :gear: Installation
67 |
68 |
69 | ```bash
70 | git clone https://github.com/da4nyy/ANTIVIRUSxML/
71 | cd ANTIVIRUSxML
72 | ```
73 |
74 |
75 |
76 | ### :running: Run Locally
77 |
78 | Clone the project
79 |
80 |
81 | Go to the project directory
82 |
83 | ```bash
84 | cd ANTIVIRUSxML/
85 | ```
86 |
87 | Install dependencies
88 |
89 | ```bash
90 | pip install -r requirements.txt
91 | ```
92 |
93 | train the model ( you can skip this phase : you already find the files in the classifier directory
94 |
95 | ```bash
96 | python3 Malware-detection-learning.py data.csv
97 | ```
98 |
99 | Start the file monitor and malware detection handler
100 |
101 | ```bash
102 | python3 antivirusXml.py -i -o
103 | ```
104 |
105 |
106 | ## :eyes: Usage
107 |
108 | + You may monitor the integrity of the files that may have PII. In this case, you can place the script where your files live, and create a crontab or use task scheduler to run the script.
109 | + You can use the script to monitor the files stored in the web app and scan the added files.
110 | + If you are in the Blue Team at a CCDC competition, you can use this script to monitor your server and easily see which files modified.
111 |
112 |
113 |
114 |
115 | ## :compass: Roadmap
116 |
117 | * [x] scan x32 PE files
118 | * [ ] scan x64 PE files
119 |
120 |
121 | ## :wave: Contributing
122 |
123 |
124 |
125 |
126 |
127 |
128 | Contributions are always welcome!
129 |
130 |
131 |
132 | ## :handshake: Contact
133 |
134 | kacem hakim - [@DARNY](https://twitter.com/darny74258511) - da4nyyy@proton.me
135 |
136 | Project Link: [https://github.com/](https://github.com/da4nyy/ANTIVIRUSxML)
137 |
138 |
139 | ## :gem: Acknowledgements
140 |
141 | - [kaggle mai dali](https://www.kaggle.com/code/maidaly/malware-detection-with-machine-learning)
142 | - [Te-k](https://github.com/Te-k)
143 | - [MaksimEkin- file integrity monitor](https://github.com/MaksimEkin/)
144 | - [Awesome README](https://github.com/matiassingers/awesome-readme)
145 |
146 |
147 |
148 |
--------------------------------------------------------------------------------
/antivirusXml.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 |
4 |
5 | """
6 | File Integrity Monitor with malware detection
7 |
8 | @author: DA4NY
9 | """
10 |
11 | # LIBRARIES NEEDED
12 | import subprocess
13 | import os
14 | import sys
15 | import pickle
16 | import datetime
17 | import hashlib
18 | import logging
19 | import time
20 | import signal
21 | from time import sleep
22 | import dictdiffer
23 | from progress.bar import Bar
24 |
25 | #==================
26 | #change here !
27 | #==================
28 |
29 |
30 | import sys
31 | import getopt
32 |
33 |
34 | import getopt
35 | import sys
36 |
37 | def get_args(argv):
38 | arg_input = ""
39 | arg_output = ""
40 |
41 | arg_help = "{0} -i -o ".format(argv[0])
42 |
43 | try:
44 | opts, args = getopt.getopt(argv[1:], "hi:o", ["help", "input=", "output="])
45 | except:
46 | print(arg_help)
47 | sys.exit(2)
48 |
49 | for opt, arg in opts:
50 | if opt in ("-h", "--help"):
51 | print(arg_help)
52 | sys.exit(2)
53 | elif opt in ("-i", "--input"):
54 | arg_input = arg
55 | print('input directory:', arg_input)
56 |
57 | elif opt in ("-o", "--output"):
58 | arg_output = arg
59 | print('output directory:', arg_output)
60 |
61 |
62 | l = [arg_input,arg_output]
63 | return l
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 | #===============
77 | # Colors
78 | #===============
79 | # Normal
80 | black="\033[0;30m"
81 | red="\033[0;31m"
82 | green="\033[0;32m"
83 | yellow="\033[0;33m"
84 | blue="\033[0;34m"
85 | purple="\033[0;35m"
86 | cyan="\033[0;36m"
87 | white="\033[0;37m"
88 | # Bold
89 | bblack="\033[1;30m"
90 | bred="\033[1;31m"
91 | bgreen="\033[1;32m"
92 | byellow="\033[1;33m"
93 | bblue="\033[1;34m"
94 | bpurple="\033[1;35m"
95 | bcyan="\033[1;36m"
96 | bwhite="\033[1;37m"
97 |
98 |
99 | #======================
100 | #printing Banner
101 | #======================
102 |
103 | def banner():
104 | logo='''
105 | '''+byellow+'''
106 | '''''' mmmm mm mm mm mm m
107 | '''''' # "m ## m"# #"m # "m m"
108 | '''''' # # # # #" # # #m # "#"
109 | '''''' # # #mm# #mmm#m # # # #
110 | '''''' #mmm" # # # # ## #
111 | ''''''
112 |
113 | '''+byellow+''' ++ File Integrity Monitor ith Malware Detection ++
114 |
115 | '''+bblue+'''
116 | *) Creates alerts for:
117 | - added files
118 | - removed files
119 | - changed files
120 |
121 | *) detect if the added / changed file is a malware
122 |
123 | *) checks the signature of the file with virustotal api
124 |
125 | '''+red+'''
126 | @author: DA4NY
127 |
128 | '''+bwhite+''''''
129 | print(logo)
130 |
131 |
132 |
133 | #==========================
134 | #Count all files in the directory and its subdirectories
135 | #we will use it in the progress bar
136 | #=========================
137 |
138 | def count(SCAN_DIR):
139 | var = 0
140 | for dirName, subdirList, fileList in os.walk(SCAN_DIR):
141 |
142 | if (list_to_ignore):
143 | for ignore in list_to_ignore:
144 |
145 | # if ignore in the list
146 | if (ignore in fileList):
147 | fileList.remove(ignore)
148 | var+=1
149 | return var
150 |
151 |
152 | #*********************
153 | #scanning files
154 | #*********************
155 | def scan_files(SCAN_DIR, list_to_ignore, LOG_FILE):
156 |
157 | try:
158 | # hold directories and files
159 | files = dict()
160 |
161 |
162 |
163 | # recursively walk to directory tree and get files
164 | with Bar('Scanning Files ...',max=count(SCAN_DIR)) as bar: #using a progress bar while scanning the files
165 | for dirName, subdirList, fileList in os.walk(SCAN_DIR):
166 |
167 | if (list_to_ignore):
168 | for ignore in list_to_ignore:
169 |
170 | # if ignore in the list
171 | if (ignore in fileList):
172 | fileList.remove(ignore)
173 |
174 | files[str(dirName)] = fileList
175 | sleep(0.02)
176 | bar.next()
177 |
178 | return files
179 |
180 | except Exception as e:
181 | msg="Error in scanning files and dirs !"
182 | logging.exception(msg)
183 |
184 |
185 |
186 |
187 | #storing hashes
188 |
189 | def save_hash(dictionary, file, LOG_FILE):
190 |
191 | try:
192 | # open the file to use to save the dictionary
193 | initial_scan_file = open(file, "wb")
194 |
195 | # use pickle to save the dictionary
196 | pickle.dump(dictionary, initial_scan_file)
197 |
198 | # close the file
199 | initial_scan_file.close
200 |
201 | except Exception as e:
202 | msg="Error while saving the dictionary"
203 | logging.exception(msg)
204 |
205 |
206 |
207 |
208 | # Load dictionary of hashes
209 |
210 | def load_dict(file, LOG_FILE):
211 |
212 | try:
213 | # open the pickle file to load
214 | infile = open(file, 'rb')
215 |
216 | # use pickle to load the dictionary
217 | loaded_dict = pickle.load(infile)
218 |
219 | # close the file
220 | infile.close()
221 |
222 | return loaded_dict
223 |
224 | except Exception as e:
225 | log(LOG_FILE, \
226 | "Error while loading the dictionary")
227 |
228 |
229 |
230 |
231 | # Log events
232 |
233 | def log(log_dir, message):
234 |
235 | # get time
236 | currentDT = datetime.datetime.now()
237 |
238 | # log event
239 | file = open(log_dir, "a+")
240 | file.write(str(message) + \
241 | " --- Time: " + \
242 | str(currentDT.strftime("%Y-%m-%d %H:%M:%S")) + \
243 | "\n")
244 | file.close
245 |
246 |
247 | def log_change(log_dir, message):
248 |
249 | # get time
250 | currentDT = datetime.datetime.now()
251 |
252 | # log event test
253 | file = open(log_dir, "a+")
254 | file.write(str(message) + \
255 | " --- Time: " + \
256 | str(currentDT.strftime("%Y-%m-%d %H:%M:%S")) + \
257 | "\n")
258 | file.close
259 | print(red,message,white)
260 |
261 |
262 |
263 |
264 | # Take SHA256 of each file
265 | # hash is taken in blocks, this is done to ensure large files doens't fail
266 |
267 | def calculate_hash(directory, LOG_FILE):
268 |
269 | try:
270 | # use hash libraries sha 256
271 | sha256_hash = hashlib.sha256()
272 |
273 | # take hash
274 | with open(directory,"rb") as f:
275 |
276 | # Read and update hash string value in blocks of 4K
277 | for byte_block in iter(lambda: f.read(4096),b""):
278 | sha256_hash.update(byte_block)
279 |
280 | # return the hash
281 | return sha256_hash.hexdigest()
282 |
283 | except Exception as e:
284 | log(LOG_FILE,"Error while taking the hash values")
285 |
286 |
287 |
288 |
289 | # integrity FUNCTION
290 |
291 | def integrity():
292 |
293 |
294 | #printing the directory to scan
295 | print("DIRECTORY TO MONITOR :{} ".format(SCAN_DIRECTORY))
296 |
297 | # start the initial scan
298 | log(LOG_FILE, "Starting the initial scan...")
299 |
300 |
301 | INITIAL_FILE_HASHES = scan()
302 |
303 | # save the initial scan dictionary of hashes
304 | save_hash(INITIAL_FILE_HASHES, \
305 | SCAN_STORAGE,\
306 | LOG_FILE)
307 | log(LOG_FILE, "Initial scan completed!")
308 |
309 |
310 | # start the integrity check
311 | log(LOG_FILE, "Starting the integrity check...")
312 |
313 | while True:
314 |
315 | # get the file hashes
316 | new_hash = scan()
317 |
318 | # load the old hash
319 | old_hash = load_dict(SCAN_STORAGE,\
320 | LOG_FILE)
321 |
322 | # compare two dict of hashes
323 | for diff in list(dictdiffer.diff(old_hash, new_hash)):
324 | # ALERT
325 |
326 | log_change(ALERT_FILE, diff)
327 | malware_detection(diff)
328 | # save the new hash
329 | save_hash(new_hash, \
330 | SCAN_STORAGE,
331 | LOG_FILE)
332 |
333 | # wait
334 | sleep(sleep_time_sc)
335 |
336 |
337 |
338 |
339 | # Scan the directory tree and take hash of the files
340 | # Return a dictionary of hashes and file paths
341 |
342 | def scan():
343 |
344 | # get dictonary of directories and files they contain
345 | directories = scan_files(SCAN_DIRECTORY, \
346 | list_to_ignore, \
347 | LOG_FILE)
348 |
349 | # take hash
350 | file_hashes = dict()
351 | for path, files in directories.items():
352 |
353 | # look at each file at path
354 | for file in files:
355 |
356 | # get the full path name to the file
357 | file_dir = str(path) + "/" + str(file)
358 |
359 | # store the hash of the file
360 | file_hashes[file_dir] = calculate_hash(file_dir, \
361 | LOG_FILE)
362 |
363 |
364 | # return dictionary with files path and hashes
365 | return file_hashes
366 |
367 |
368 |
369 |
370 |
371 | def malware_detection(diff):
372 | with open(ALERT_FILE, "r+") as alert_file:
373 | for line in alert_file:
374 | pass
375 |
376 | test = line.split("'")
377 | if test[1]== "change":
378 | file_to_scan = test[3]
379 | if test[1]=="add":
380 | file_to_scan = test[5]
381 |
382 | print(bgreen,"[+] Scanning {} ...".format(file_to_scan),bwhite)
383 | if file_to_scan[0]== ".":
384 | extention = file_to_scan[1:]
385 | try :
386 | if extention.split(".")[1] =="exe":
387 |
388 | try :
389 | subprocess.call(['python3','Mal-detection.py', file_to_scan])
390 | except :
391 | print(bred ,"[x] Failed to run the Malware detection !!!",bwhite )
392 | else:
393 | print(bred,"[x] The file isn't a windows executable !!! Currently we can only can windows x32 files !")
394 | print(bgreen,"[+] Trying the virus total api ... ",bwhite)
395 | api_virus_total(file_to_scan)
396 | except:
397 | print(bred,"[x] The file isn't a windows executable !!! Currently we can only can windows x32 files !")
398 | print(bgreen,"[+] Trying the virus total api ... ",bwhite)
399 | api_virus_total(file_to_scan)
400 |
401 |
402 |
403 |
404 |
405 | def api_virus_total(file):
406 | subprocess.call(['python3','virustotal.py','-m',file])
407 | def hand_sign(signum, frame):
408 | res = input("Ctrl-c was pressed. Do you really want to exit? y/n :")
409 | if res == 'y':
410 | print(red,"[x] Quitting!\n",bgreen,"[+] Saving the results in {} ".format(ALERT_FILE))
411 | exit(1)
412 |
413 | signal.signal(signal.SIGINT, hand_sign)
414 |
415 | # execute
416 | if __name__ == "__main__":
417 | l=[]
418 | SCAN_DIRECTORY = '.'
419 | ALERT_FILE = 'alert.log'
420 | l= get_args(sys.argv)
421 | if l[0]!="":
422 | SCAN_DIRECTORY = l[0]
423 | if l[1]!="":
424 | ALERT_FILE = l[1]
425 |
426 |
427 |
428 | SCAN_STORAGE = 'hashes.pkl'
429 | LOG_FILE = 'handler.log'
430 | list_to_ignore=[SCAN_STORAGE, LOG_FILE, ALERT_FILE]
431 | sleep_time_sc=4
432 | print(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))+"\n")
433 | #Starting the integrity monitor
434 | banner()
435 | integrity()
436 |
437 |
438 |
--------------------------------------------------------------------------------
/classifier/classifier.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/da4nyy/ANTIVIRUSxML/ff2d4b1cf1c0f71241b99e24505cba458fd81998/classifier/classifier.pkl
--------------------------------------------------------------------------------
/classifier/features.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/da4nyy/ANTIVIRUSxML/ff2d4b1cf1c0f71241b99e24505cba458fd81998/classifier/features.pkl
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | hashlib
2 | logging
3 | dictdiffer
4 | progress
5 |
--------------------------------------------------------------------------------
/virustotal.py:
--------------------------------------------------------------------------------
1 | # upload PE file to VirusTotal
2 | # then get info about the results
3 | # of analysis, print if malicious
4 | import os
5 | import sys
6 | import time
7 | import json
8 | import requests
9 | import argparse
10 | import hashlib
11 |
12 | # for terminal colors
13 | class Colors:
14 | BLUE = '\033[94m'
15 | GREEN = '\033[92m'
16 | YELLOW = '\033[93m'
17 | RED = '\033[91m'
18 | PURPLE = '\033[95m'
19 | ENDC = '\033[0m'
20 |
21 | # VirusTotal API key
22 | VT_API_KEY = "< PUT UR KEY HERE >"
23 |
24 | # VirusTotal API v3 URL
25 | VT_API_URL = "https://www.virustotal.com/api/v3/"
26 |
27 | # upload malicious file to VirusTotal and analyse
28 | class VTScan:
29 | def __init__(self):
30 | self.headers = {
31 | "x-apikey" : VT_API_KEY,
32 | "User-Agent" : "vtscan v.1.0",
33 | "Accept-Encoding" : "gzip, deflate",
34 | }
35 |
36 | def upload(self, malware_path):
37 | print (Colors.BLUE + "upload file: " + malware_path + "..." + Colors.ENDC)
38 | self.malware_path = malware_path
39 | upload_url = VT_API_URL + "files"
40 | files = {"file" : (
41 | os.path.basename(malware_path),
42 | open(os.path.abspath(malware_path), "rb"))
43 | }
44 | print (Colors.YELLOW + "upload to " + upload_url + Colors.ENDC)
45 | res = requests.post(upload_url, headers = self.headers, files = files)
46 | if res.status_code == 200:
47 | result = res.json()
48 | self.file_id = result.get("data").get("id")
49 | print (Colors.YELLOW + self.file_id + Colors.ENDC)
50 | print (Colors.GREEN + "successfully upload PE file: OK" + Colors.ENDC)
51 | else:
52 | print (Colors.RED + "failed to upload PE file :(" + Colors.ENDC)
53 | print (Colors.RED + "status code: " + str(res.status_code) + Colors.ENDC)
54 | sys.exit()
55 |
56 | def analyse(self):
57 | print (Colors.BLUE + "get info about the results of analysis..." + Colors.ENDC)
58 | analysis_url = VT_API_URL + "analyses/" + self.file_id
59 | res = requests.get(analysis_url, headers = self.headers)
60 | if res.status_code == 200:
61 | result = res.json()
62 | status = result.get("data").get("attributes").get("status")
63 | if status == "completed":
64 | stats = result.get("data").get("attributes").get("stats")
65 | results = result.get("data").get("attributes").get("results")
66 | print (Colors.RED + "malicious: " + str(stats.get("malicious")) + Colors.ENDC)
67 | print (Colors.YELLOW + "undetected : " + str(stats.get("undetected")) + Colors.ENDC)
68 | print ()
69 | for k in results:
70 | if results[k].get("category") == "malicious":
71 | print ("==================================================")
72 | print (Colors.GREEN + results[k].get("engine_name") + Colors.ENDC)
73 | print ("version : " + results[k].get("engine_version"))
74 | print ("category : " + results[k].get("category"))
75 | print ("result : " + Colors.RED + results[k].get("result") + Colors.ENDC)
76 | print ("method : " + results[k].get("method"))
77 | print ("update : " + results[k].get("engine_update"))
78 | print ("==================================================")
79 | print ()
80 | print (Colors.GREEN + "successfully analyse: OK" + Colors.ENDC)
81 | sys.exit()
82 | elif status == "queued":
83 | print (Colors.BLUE + "status QUEUED..." + Colors.ENDC)
84 | with open(os.path.abspath(self.malware_path), "rb") as malware_path:
85 | b = malware_path.read()
86 | hashsum = hashlib.sha256(b).hexdigest()
87 | self.info(hashsum)
88 | else:
89 | print (Colors.RED + "failed to get results of analysis :(" + Colors.ENDC)
90 | print (Colors.RED + "status code: " + str(res.status_code) + Colors.ENDC)
91 | sys.exit()
92 |
93 | def run(self, malware_path):
94 | self.upload(malware_path)
95 | self.analyse()
96 |
97 | def info(self, file_hash):
98 | print (Colors.BLUE + "get file info by ID: " + file_hash + Colors.ENDC)
99 | info_url = VT_API_URL + "files/" + file_hash
100 | res = requests.get(info_url, headers = self.headers)
101 | if res.status_code == 200:
102 | result = res.json()
103 | if result.get("data").get("attributes").get("last_analysis_results"):
104 | stats = result.get("data").get("attributes").get("last_analysis_stats")
105 | results = result.get("data").get("attributes").get("last_analysis_results")
106 | print (Colors.RED + "malicious: " + str(stats.get("malicious")) + Colors.ENDC)
107 | print (Colors.YELLOW + "undetected : " + str(stats.get("undetected")) + Colors.ENDC)
108 | print ()
109 | for k in results:
110 | if results[k].get("category") == "malicious":
111 | print ("==================================================")
112 | print (Colors.GREEN + results[k].get("engine_name") + Colors.ENDC)
113 | print ("version : " + results[k].get("engine_version"))
114 | print ("category : " + results[k].get("category"))
115 | print ("result : " + Colors.RED + results[k].get("result") + Colors.ENDC)
116 | print ("method : " + results[k].get("method"))
117 | print ("update : " + results[k].get("engine_update"))
118 | print ("==================================================")
119 | print ()
120 | print (Colors.GREEN + "successfully analyse: OK" + Colors.ENDC)
121 | sys.exit()
122 | else:
123 | print (Colors.BLUE + "failed to analyse :(..." + Colors.ENDC)
124 |
125 | else:
126 | print (Colors.RED + "failed to get information :(" + Colors.ENDC)
127 | print (Colors.RED + "status code: " + str(res.status_code) + Colors.ENDC)
128 | sys.exit()
129 | def hand_sign(signum, frame):
130 | res = input("Ctrl-c was pressed. Do you really want to exit? y/n ")
131 | if res == 'y':
132 | print(red,"QUitting !")
133 | exit(1)
134 | if __name__ == "__main__":
135 | parser = argparse.ArgumentParser()
136 | parser.add_argument('-m','--mal', required = True, help = "PE file path for scanning")
137 | args = vars(parser.parse_args())
138 | vtscan = VTScan()
139 | vtscan.run(args["mal"])
140 |
--------------------------------------------------------------------------------