├── README.md
└── truehunter.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Truehunter
 2 | The goal of Truehunter is to detect encrypted containers using a fast and memory efficient approach without any external dependencies for ease of portability. It was designed to detect Truecrypt and Veracrypt containers, however it may detect any encrypted file with a 'header' not included in its database.  
 3 |   
 4 | Truehunter performs the following checks:
 5 | 1. Test the first 8 bytes of the file against its own database.  
 6 | 2. File size modulo 64 must be zero.  
 7 | 3. Calculates file entropy.  
 8 |   
 9 | Truehunter is part of BlackArch forensic tools.  
10 | https://blackarch.org/forensic.html
11 | 
12 | ## Installation
13 | Any Python version from 2.7-3.7 should work, it does not need any additional libraries. 
14 |   
15 | ## Usage  
16 |   
17 | The headers database file will be created with the first use, and can be updated after every scan. Note this is not a correct header database, just the first 8 bytes of every file, extension and date(It does the job as a PoC).  
18 |   
19 | Fast Scan: Searchs for files with a size % 64 = 0 (block ciphers), unknown headers and appearing less than MAXHEADER value (default 3).  
20 | Default Scan: Performs a fast scan and calculates the entropy of the resulting files to reduce false positives.  
21 |   
22 | ```  
23 | usage: truehunter.py [-h] [-D HEADERSFILE] [-m MINSIZE] [-M MAXSIZE]  
24 |                      [-R MAXHEADER] [-f] [-o OUTPUTFILE]  
25 |                       LOCATION  
26 |   
27 | Checks for file size, unknown header, and entropy of files to determine if  
28 | they are encrypted containers.  
29 |   
30 | positional arguments:  
31 |   LOCATION              Drive or directory to scan.  
32 | 
33 | optional arguments:  
34 |   -h, --help            show this help message and exit.   
35 |   -D HEADERSFILE, --database HEADERSFILE  
36 |                         Headers database file, default headers.db  
37 |   -m MINSIZE, --minsize MINSIZE  
38 |                         Minimum file size in Kb, default 1Mb.  
39 |   -M MAXSIZE, --maxsize MAXSIZE  
40 |                         Maximum file size in Kb, default 100Mb.  
41 |   -R MAXHEADER, --repeatHeader MAXHEADER  
42 |                         Discard files with unknown headers repeated more than  
43 |                         N times, default 3.  
44 |   -f, --fast            Do not calculate entropy.  
45 |   -o OUTPUTFILE, --outputfile OUTPUTFILE  
46 |                         Scan results file name, default scan_results.csv
47 | ```
48 |   
49 | ## License: GPLv3
50 |   
51 | Truehunter  
52 | Author Andres Doreste  
53 | Copyright (C) 2015, Andres Doreste  
54 | License:   GPLv3  
55 | 


--------------------------------------------------------------------------------
/truehunter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # This tool has been created as a PoC.
  3 | 
  4 | import argparse
  5 | import csv
  6 | import hashlib
  7 | import math
  8 | import os
  9 | import sys
 10 | import shutil
 11 | import sqlite3
 12 | import sys
 13 | import time
 14 | from binascii import hexlify
 15 | from collections import Counter
 16 | 
 17 | class TrueHunter:
 18 |     def __init__(self, database, min_file_size, max_file_size, max_header_count, output_file):
 19 |         self.db = DbUtils(database)
 20 |         self.min_file_size = min_file_size
 21 |         self.max_file_size = max_file_size
 22 |         self.max_header_count = max_header_count
 23 |         self.repeated_headers = self.db.get_all_headers()
 24 |         self.first_check = []
 25 |         self.fast_scan_positives = []
 26 |         self.slow_scan_positives = []
 27 |         self.ignored_files = []  # Bigger than maxFileSize
 28 |         self.output_file = output_file
 29 |         self.full_scan_completed = False
 30 | 
 31 |     def fast_scan(self, location):
 32 |         # Step one, check size and read first 8 bytes
 33 |         for (path, subdir, files) in os.walk(location):
 34 |             for filename in files:
 35 |                 file_path = os.path.join(path, filename)
 36 |                 try:
 37 |                     file_size = os.path.getsize(file_path) / 1024
 38 |                     if (file_size % 64 == 0) and (file_size > self.min_file_size):
 39 |                         # Read first 8 bytes, not a real header.
 40 |                         header = hexlify(open(file_path, "rb").read(8)).decode("utf-8")
 41 |                         if header in self.repeated_headers:
 42 |                             self.repeated_headers[header][0] += 1
 43 |                         else:
 44 |                             self.repeated_headers[header] = [1, filename]
 45 |                         self.first_check.append([file_path, file_size, header])
 46 |                 except:
 47 |                     print("[!] Error reading {}".format(file_path))
 48 |         # Step two, check for header repetitions
 49 |         for (file_path, file_size, header) in self.first_check:
 50 |             if self.repeated_headers[header][0] <= self.max_header_count:
 51 |                 self.fast_scan_positives.append({"Path": file_path,
 52 |                                                 "File Size": file_size, 
 53 |                                                 "Header": header})
 54 | 
 55 |     def slow_scan(self):
 56 |         # Memory efficient entropy calculation
 57 |         for item in self.fast_scan_positives:
 58 |             file_path = item.get("Path")
 59 |             header = item.get("Header")
 60 |             file_size = os.path.getsize(file_path)
 61 |             entropy = 0.0
 62 |             hash_func = hashlib.md5()
 63 |             if (file_size / 1024) <= self.max_file_size:
 64 |                 hex_freq = {}
 65 |                 dec_freq = {}
 66 |                 with open(file_path, "rb") as f:
 67 |                     # Read chunks instead of mapping the whole file
 68 |                     while True:
 69 |                         data_chunk = f.read(65535)
 70 |                         if not data_chunk:
 71 |                             break
 72 |                         hash_func.update(data_chunk)
 73 |                         hex_freq = Counter(hex_freq) + (Counter(data_chunk))
 74 |                 # Transform (hex)byte values to (dec)byte and prepare counters for entropy calculation.
 75 |                 for byte in hex_freq:
 76 |                     dec_freq[byte] = float(hex_freq[byte]) / float(file_size)
 77 |                 # Entropy calculation.
 78 |                 for repetition in dec_freq.values():
 79 |                     if repetition > 0:
 80 |                         entropy -= repetition * math.log(repetition, 2)
 81 |                 if entropy > 7.998:
 82 |                     self.slow_scan_positives.append(
 83 |                         {"Path": file_path, 
 84 |                         "Entropy": entropy, "MD5 Hash": hash_func.hexdigest(),
 85 |                          "File Size": file_size, "Header": header})
 86 |             else:
 87 |                 self.ignored_files.append(
 88 |                     {"Path": file_path,
 89 |                     "Entropy": "Not calculated", 
 90 |                     "File Size": file_size, 
 91 |                     "Header": header})
 92 |         self.full_scan_completed = True
 93 | 
 94 |     def write_results(self):
 95 |         # Write results to a CSV file
 96 |         with open(self.output_file, 'w') as csvfile:
 97 |             field_names = ["Path", "Entropy", "MD5 Hash", "File Size", "Header"]
 98 |             writer = csv.DictWriter(csvfile, fieldnames=field_names, dialect=csv.excel)
 99 |             writer.writeheader()
100 |             if self.full_scan_completed:
101 |                 if len(self.slow_scan_positives) > 0:
102 |                     writer.writerows(self.slow_scan_positives)
103 |                 else:
104 |                     print("[!] No files detected.")
105 |                 if len(self.ignored_files) > 0:
106 |                     writer.writerows(self.ignored_files)
107 |                     print("[+] Manually check ignored files or repeat the scan increasing the maximum file size to " \
108 |                           "scan (-M, --maxsize). ")
109 |             else:
110 |                 if len(self.fast_scan_positives) > 0:
111 |                     writer.writerows(self.fast_scan_positives)
112 |                 else:
113 |                     print("[!] No files detected.")
114 | 
115 |     def add_repeated_headers(self):
116 |         # Update headers.db
117 |         headers = []
118 |         for item in self.repeated_headers:
119 |             # Only update if header repetition count is bigger than 10
120 |             if self.repeated_headers.get(item)[0] < 10:
121 |                 continue
122 |             try:
123 |                 extension = self.repeated_headers.get(item)[1][::-1].split('.')[0][::-1]
124 |             except:
125 |                 extension = ""
126 |             header = item
127 |             headers.append([header, extension])
128 |         self.db.update_db(headers)
129 | 
130 | 
131 | class DbUtils:
132 |     def __init__(self, database_file):
133 |         # Check if the db file exists.
134 |         if not os.path.isfile(database_file):
135 |             self.create_db(database_file)
136 |         else:
137 |             self.conn = sqlite3.connect(database_file)
138 |             self.c = self.conn.cursor()
139 | 
140 |     def create_db(self, database_file):
141 |         # Create Database
142 |         self.conn = sqlite3.connect(database_file)
143 |         self.c = self.conn.cursor()
144 |         # Create table
145 |         self.c.execute('''CREATE TABLE headers
146 |                      (header text, extension text, date text)''')
147 |         # Save (commit) the changes
148 |         self.conn.commit()
149 | 
150 |     def update_db(self, headers_array):
151 |         date = time.strftime("%d/%m/%Y")
152 |         # headers array must contain arrays ['header','extension']
153 |         for header, extension in headers_array:
154 |             data = (header, extension, date,)
155 |             if self.get_header(header) is not None:
156 |                 continue  # avoid adding repeated headers
157 |             else:
158 |                 self.c.execute('INSERT INTO headers VALUES (?,?,?)', data)
159 |         self.conn.commit()
160 | 
161 |     def get_header(self, header):
162 |         self.c.execute('SELECT * FROM headers WHERE header=?', (header,))
163 |         return self.c.fetchone()
164 | 
165 |     def get_all_headers(self):
166 |         self.c.execute('SELECT Header FROM headers')
167 |         x = self.c.fetchall()
168 |         headers = {}
169 |         if len(x) == 0:
170 |             return headers
171 |         for header in x:
172 |             headers[header[0]] = 0
173 |         return headers
174 | 
175 |     def close_db(self):
176 |         self.conn.commit()
177 |         self.conn.close()
178 | 
179 | 
180 | def update_db(th, database):
181 |     if sys.version_info.major > 2:
182 |         update = input("[?] Save repeated headers from this scan? [Y/N]")
183 |     else:
184 |         update = raw_input("[?] Save repeated headers from this scan? [Y/N]")
185 |     if update.lower() == "y":
186 |         try:
187 |             shutil.copyfile(database, "headers.db.bck")
188 |             print("[+] Database backup saved as headers.db.bck")
189 |         except IOError:
190 |             print("[!] Could not backup the existing database")
191 |             sys.exit(0)
192 |         th.add_repeated_headers()
193 |         print("[+] Database updated.")
194 |     sys.exit(0)
195 | 
196 | 
197 | def main():
198 |     description = """ _                   _                 _            
199 | | |                 | |               | |           
200 | | |_ _ __ _   _  ___| |__  _   _ _ __ | |_ ___ _ __ 
201 | | __| '__| | | |/ _ \ '_ \| | | | '_ \| __/ _ \ '__|
202 | | |_| |  | |_| |  __/ | | | |_| | | | | ||  __/ |   
203 |  \__|_|   \__,_|\___|_| |_|\__,_|_| |_|\__\___|_|   
204 | [+] Truehunter detects TrueCrypt containers and high entropy files (probably encrypted).\n[+] Autor: Andres Doreste\n[+] LinkedIn: https://www.linkedin.com/in/andres-doreste-239471136/\n[+] Notes: This project it's just a PoC\n"""
205 |     print(description)
206 | 
207 |     parser = argparse.ArgumentParser(
208 |         description="Checks for file size, unknown header, and entropy of files to determine if they are encrypted containers.")
209 |     parser.add_argument("LOCATION", help="Drive or directory to scan.")
210 |     parser.add_argument("-D", "--database", dest="headers_file", default="headers.db",
211 |                         help="Headers database file, default headers.db")
212 |     parser.add_argument("-m", "--minsize", dest="min_size", default=1024, type=int,
213 |                         help="Minimum file size in Kb, default 1Mb.")
214 |     parser.add_argument("-M", "--maxsize", dest="max_size", default=102400, type=int,
215 |                         help="Maximum file size in Kb, default 100Mb.")
216 |     parser.add_argument("-R", "--repeatHeader", dest="max_header", default=3, type=int,
217 |                         help="Discard files with unknown headers repeated more than N times, default 3.")
218 |     parser.add_argument("-f", "--fast", dest="fast_scan", action="store_true", help="Do not calculate entropy.")
219 |     parser.add_argument("-o", "--outputfile", dest="output_file", default="scan_results.csv",
220 |                         help="Scan results file name, default scan_results.csv")
221 |     args = parser.parse_args()
222 | 
223 |     if not os.path.exists(args.LOCATION):
224 |         print("[!] Could not read {}".format(args.LOCATION))
225 |         sys.exit(0)
226 | 
227 |     th = TrueHunter(args.headers_file, args.min_size, args.max_size, args.max_header, args.output_file)
228 |     start_time = time.time()
229 | 
230 |     print("[>] Starting fast scan, it shouldn't take too long...")
231 |     th.fast_scan(args.LOCATION)
232 | 
233 |     print("[+] {} files detected.".format(len(th.fast_scan_positives)))
234 |     print("[>] Done!")
235 | 
236 |     if args.fast_scan:
237 |         print("[!] Scan finished in {0:.2f} seconds.".format(time.time() - start_time))
238 |         th.write_results()
239 |         update_db(th, args.headers_file)
240 | 
241 |     print("[>] Starting entropy scan, staring at the screen won't help at this moment...")
242 |     th.slow_scan()
243 | 
244 |     print("[+] {} files detected.".format(len(th.slow_scan_positives)))
245 |     print("[!] {} files possible encrypted files ignored".format(len(th.ignored_files)))
246 |     th.write_results()
247 |     print("[+] Results saved in {}".format(args.output_file))
248 |     print("[>] Scan finished")
249 |     update_db(th, args.headers_file)
250 | 
251 | if __name__ == "__main__":
252 |     main()
253 | 


--------------------------------------------------------------------------------