├── README.md └── truehunter.py /README.md: -------------------------------------------------------------------------------- 1 | # Truehunter 2 | The goal of Truehunter is to detect encrypted containers using a fast and memory efficient approach without any external dependencies for ease of portability. It was designed to detect Truecrypt and Veracrypt containers, however it may detect any encrypted file with a 'header' not included in its database. 3 | 4 | Truehunter performs the following checks: 5 | 1. Test the first 8 bytes of the file against its own database. 6 | 2. File size modulo 64 must be zero. 7 | 3. Calculates file entropy. 8 | 9 | Truehunter is part of BlackArch forensic tools. 10 | https://blackarch.org/forensic.html 11 | 12 | ## Installation 13 | Any Python version from 2.7-3.7 should work, it does not need any additional libraries. 14 | 15 | ## Usage 16 | 17 | The headers database file will be created with the first use, and can be updated after every scan. Note this is not a correct header database, just the first 8 bytes of every file, extension and date(It does the job as a PoC). 18 | 19 | Fast Scan: Searchs for files with a size % 64 = 0 (block ciphers), unknown headers and appearing less than MAXHEADER value (default 3). 20 | Default Scan: Performs a fast scan and calculates the entropy of the resulting files to reduce false positives. 21 | 22 | ``` 23 | usage: truehunter.py [-h] [-D HEADERSFILE] [-m MINSIZE] [-M MAXSIZE] 24 | [-R MAXHEADER] [-f] [-o OUTPUTFILE] 25 | LOCATION 26 | 27 | Checks for file size, unknown header, and entropy of files to determine if 28 | they are encrypted containers. 29 | 30 | positional arguments: 31 | LOCATION Drive or directory to scan. 32 | 33 | optional arguments: 34 | -h, --help show this help message and exit. 35 | -D HEADERSFILE, --database HEADERSFILE 36 | Headers database file, default headers.db 37 | -m MINSIZE, --minsize MINSIZE 38 | Minimum file size in Kb, default 1Mb. 39 | -M MAXSIZE, --maxsize MAXSIZE 40 | Maximum file size in Kb, default 100Mb. 41 | -R MAXHEADER, --repeatHeader MAXHEADER 42 | Discard files with unknown headers repeated more than 43 | N times, default 3. 44 | -f, --fast Do not calculate entropy. 45 | -o OUTPUTFILE, --outputfile OUTPUTFILE 46 | Scan results file name, default scan_results.csv 47 | ``` 48 | 49 | ## License: GPLv3 50 | 51 | Truehunter 52 | Author Andres Doreste 53 | Copyright (C) 2015, Andres Doreste 54 | License: GPLv3 55 | -------------------------------------------------------------------------------- /truehunter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # This tool has been created as a PoC. 3 | 4 | import argparse 5 | import csv 6 | import hashlib 7 | import math 8 | import os 9 | import sys 10 | import shutil 11 | import sqlite3 12 | import sys 13 | import time 14 | from binascii import hexlify 15 | from collections import Counter 16 | 17 | class TrueHunter: 18 | def __init__(self, database, min_file_size, max_file_size, max_header_count, output_file): 19 | self.db = DbUtils(database) 20 | self.min_file_size = min_file_size 21 | self.max_file_size = max_file_size 22 | self.max_header_count = max_header_count 23 | self.repeated_headers = self.db.get_all_headers() 24 | self.first_check = [] 25 | self.fast_scan_positives = [] 26 | self.slow_scan_positives = [] 27 | self.ignored_files = [] # Bigger than maxFileSize 28 | self.output_file = output_file 29 | self.full_scan_completed = False 30 | 31 | def fast_scan(self, location): 32 | # Step one, check size and read first 8 bytes 33 | for (path, subdir, files) in os.walk(location): 34 | for filename in files: 35 | file_path = os.path.join(path, filename) 36 | try: 37 | file_size = os.path.getsize(file_path) / 1024 38 | if (file_size % 64 == 0) and (file_size > self.min_file_size): 39 | # Read first 8 bytes, not a real header. 40 | header = hexlify(open(file_path, "rb").read(8)).decode("utf-8") 41 | if header in self.repeated_headers: 42 | self.repeated_headers[header][0] += 1 43 | else: 44 | self.repeated_headers[header] = [1, filename] 45 | self.first_check.append([file_path, file_size, header]) 46 | except: 47 | print("[!] Error reading {}".format(file_path)) 48 | # Step two, check for header repetitions 49 | for (file_path, file_size, header) in self.first_check: 50 | if self.repeated_headers[header][0] <= self.max_header_count: 51 | self.fast_scan_positives.append({"Path": file_path, 52 | "File Size": file_size, 53 | "Header": header}) 54 | 55 | def slow_scan(self): 56 | # Memory efficient entropy calculation 57 | for item in self.fast_scan_positives: 58 | file_path = item.get("Path") 59 | header = item.get("Header") 60 | file_size = os.path.getsize(file_path) 61 | entropy = 0.0 62 | hash_func = hashlib.md5() 63 | if (file_size / 1024) <= self.max_file_size: 64 | hex_freq = {} 65 | dec_freq = {} 66 | with open(file_path, "rb") as f: 67 | # Read chunks instead of mapping the whole file 68 | while True: 69 | data_chunk = f.read(65535) 70 | if not data_chunk: 71 | break 72 | hash_func.update(data_chunk) 73 | hex_freq = Counter(hex_freq) + (Counter(data_chunk)) 74 | # Transform (hex)byte values to (dec)byte and prepare counters for entropy calculation. 75 | for byte in hex_freq: 76 | dec_freq[byte] = float(hex_freq[byte]) / float(file_size) 77 | # Entropy calculation. 78 | for repetition in dec_freq.values(): 79 | if repetition > 0: 80 | entropy -= repetition * math.log(repetition, 2) 81 | if entropy > 7.998: 82 | self.slow_scan_positives.append( 83 | {"Path": file_path, 84 | "Entropy": entropy, "MD5 Hash": hash_func.hexdigest(), 85 | "File Size": file_size, "Header": header}) 86 | else: 87 | self.ignored_files.append( 88 | {"Path": file_path, 89 | "Entropy": "Not calculated", 90 | "File Size": file_size, 91 | "Header": header}) 92 | self.full_scan_completed = True 93 | 94 | def write_results(self): 95 | # Write results to a CSV file 96 | with open(self.output_file, 'w') as csvfile: 97 | field_names = ["Path", "Entropy", "MD5 Hash", "File Size", "Header"] 98 | writer = csv.DictWriter(csvfile, fieldnames=field_names, dialect=csv.excel) 99 | writer.writeheader() 100 | if self.full_scan_completed: 101 | if len(self.slow_scan_positives) > 0: 102 | writer.writerows(self.slow_scan_positives) 103 | else: 104 | print("[!] No files detected.") 105 | if len(self.ignored_files) > 0: 106 | writer.writerows(self.ignored_files) 107 | print("[+] Manually check ignored files or repeat the scan increasing the maximum file size to " \ 108 | "scan (-M, --maxsize). ") 109 | else: 110 | if len(self.fast_scan_positives) > 0: 111 | writer.writerows(self.fast_scan_positives) 112 | else: 113 | print("[!] No files detected.") 114 | 115 | def add_repeated_headers(self): 116 | # Update headers.db 117 | headers = [] 118 | for item in self.repeated_headers: 119 | # Only update if header repetition count is bigger than 10 120 | if self.repeated_headers.get(item)[0] < 10: 121 | continue 122 | try: 123 | extension = self.repeated_headers.get(item)[1][::-1].split('.')[0][::-1] 124 | except: 125 | extension = "" 126 | header = item 127 | headers.append([header, extension]) 128 | self.db.update_db(headers) 129 | 130 | 131 | class DbUtils: 132 | def __init__(self, database_file): 133 | # Check if the db file exists. 134 | if not os.path.isfile(database_file): 135 | self.create_db(database_file) 136 | else: 137 | self.conn = sqlite3.connect(database_file) 138 | self.c = self.conn.cursor() 139 | 140 | def create_db(self, database_file): 141 | # Create Database 142 | self.conn = sqlite3.connect(database_file) 143 | self.c = self.conn.cursor() 144 | # Create table 145 | self.c.execute('''CREATE TABLE headers 146 | (header text, extension text, date text)''') 147 | # Save (commit) the changes 148 | self.conn.commit() 149 | 150 | def update_db(self, headers_array): 151 | date = time.strftime("%d/%m/%Y") 152 | # headers array must contain arrays ['header','extension'] 153 | for header, extension in headers_array: 154 | data = (header, extension, date,) 155 | if self.get_header(header) is not None: 156 | continue # avoid adding repeated headers 157 | else: 158 | self.c.execute('INSERT INTO headers VALUES (?,?,?)', data) 159 | self.conn.commit() 160 | 161 | def get_header(self, header): 162 | self.c.execute('SELECT * FROM headers WHERE header=?', (header,)) 163 | return self.c.fetchone() 164 | 165 | def get_all_headers(self): 166 | self.c.execute('SELECT Header FROM headers') 167 | x = self.c.fetchall() 168 | headers = {} 169 | if len(x) == 0: 170 | return headers 171 | for header in x: 172 | headers[header[0]] = 0 173 | return headers 174 | 175 | def close_db(self): 176 | self.conn.commit() 177 | self.conn.close() 178 | 179 | 180 | def update_db(th, database): 181 | if sys.version_info.major > 2: 182 | update = input("[?] Save repeated headers from this scan? [Y/N]") 183 | else: 184 | update = raw_input("[?] Save repeated headers from this scan? [Y/N]") 185 | if update.lower() == "y": 186 | try: 187 | shutil.copyfile(database, "headers.db.bck") 188 | print("[+] Database backup saved as headers.db.bck") 189 | except IOError: 190 | print("[!] Could not backup the existing database") 191 | sys.exit(0) 192 | th.add_repeated_headers() 193 | print("[+] Database updated.") 194 | sys.exit(0) 195 | 196 | 197 | def main(): 198 | description = """ _ _ _ 199 | | | | | | | 200 | | |_ _ __ _ _ ___| |__ _ _ _ __ | |_ ___ _ __ 201 | | __| '__| | | |/ _ \ '_ \| | | | '_ \| __/ _ \ '__| 202 | | |_| | | |_| | __/ | | | |_| | | | | || __/ | 203 | \__|_| \__,_|\___|_| |_|\__,_|_| |_|\__\___|_| 204 | [+] Truehunter detects TrueCrypt containers and high entropy files (probably encrypted).\n[+] Autor: Andres Doreste\n[+] LinkedIn: https://www.linkedin.com/in/andres-doreste-239471136/\n[+] Notes: This project it's just a PoC\n""" 205 | print(description) 206 | 207 | parser = argparse.ArgumentParser( 208 | description="Checks for file size, unknown header, and entropy of files to determine if they are encrypted containers.") 209 | parser.add_argument("LOCATION", help="Drive or directory to scan.") 210 | parser.add_argument("-D", "--database", dest="headers_file", default="headers.db", 211 | help="Headers database file, default headers.db") 212 | parser.add_argument("-m", "--minsize", dest="min_size", default=1024, type=int, 213 | help="Minimum file size in Kb, default 1Mb.") 214 | parser.add_argument("-M", "--maxsize", dest="max_size", default=102400, type=int, 215 | help="Maximum file size in Kb, default 100Mb.") 216 | parser.add_argument("-R", "--repeatHeader", dest="max_header", default=3, type=int, 217 | help="Discard files with unknown headers repeated more than N times, default 3.") 218 | parser.add_argument("-f", "--fast", dest="fast_scan", action="store_true", help="Do not calculate entropy.") 219 | parser.add_argument("-o", "--outputfile", dest="output_file", default="scan_results.csv", 220 | help="Scan results file name, default scan_results.csv") 221 | args = parser.parse_args() 222 | 223 | if not os.path.exists(args.LOCATION): 224 | print("[!] Could not read {}".format(args.LOCATION)) 225 | sys.exit(0) 226 | 227 | th = TrueHunter(args.headers_file, args.min_size, args.max_size, args.max_header, args.output_file) 228 | start_time = time.time() 229 | 230 | print("[>] Starting fast scan, it shouldn't take too long...") 231 | th.fast_scan(args.LOCATION) 232 | 233 | print("[+] {} files detected.".format(len(th.fast_scan_positives))) 234 | print("[>] Done!") 235 | 236 | if args.fast_scan: 237 | print("[!] Scan finished in {0:.2f} seconds.".format(time.time() - start_time)) 238 | th.write_results() 239 | update_db(th, args.headers_file) 240 | 241 | print("[>] Starting entropy scan, staring at the screen won't help at this moment...") 242 | th.slow_scan() 243 | 244 | print("[+] {} files detected.".format(len(th.slow_scan_positives))) 245 | print("[!] {} files possible encrypted files ignored".format(len(th.ignored_files))) 246 | th.write_results() 247 | print("[+] Results saved in {}".format(args.output_file)) 248 | print("[>] Scan finished") 249 | update_db(th, args.headers_file) 250 | 251 | if __name__ == "__main__": 252 | main() 253 | --------------------------------------------------------------------------------