├── TODO.txt ├── lib ├── sandboxes.py ├── auxiliary.py ├── vt.py └── artifact.py ├── vti_search.py └── README.md /TODO.txt: -------------------------------------------------------------------------------- 1 | TODO: 2 | 3 | - consolidate queues, and refactor 4 | - check if sequential saving of report files (e.g., *.raw) leads to significant delays 5 | (potentially change this to asynchronous processing as well) 6 | - support processing livehunt notifications 7 | - support looking up URLs and domains when information is provided via a file (-f) rather than 8 | via a query ( - for intelligence searches, looking up domains and URLs is already supported) 9 | - support sort options for Intelligence searches 10 | -------------------------------------------------------------------------------- /lib/sandboxes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import os.path 5 | 6 | class Sandbox_Parser(): 7 | 8 | def __init__(self, options, report): 9 | """ 10 | :param report: A (collection of) sandbox report(s) in JSON format 11 | """ 12 | 13 | self.options = options 14 | self.report = report 15 | 16 | self.auxiliary = options["auxiliary"] 17 | 18 | 19 | def parse_report(self, sample, required_verbose_level = 1): 20 | """ Parses the (list of) sandbox report(s) that are defined in a dynamic analysis 21 | collection, and extracts the network indicators 22 | 23 | :param sample: A sample object 24 | """ 25 | 26 | traffic_objects = [] 27 | verbose_level = "INFO" if self.options["verbose"] >= required_verbose_level else "DEBUG" 28 | 29 | for sandbox in self.report: 30 | if "attributes" not in sandbox or "sandbox_name" not in sandbox["attributes"]: continue 31 | data = sandbox["attributes"] 32 | attributes = dir(sample) 33 | 34 | # extract unique network indicators across all sandbox reports 35 | if "ip_traffic" in data: 36 | traffic = data["ip_traffic"] 37 | for item in traffic: 38 | # only consider UDP or TCP connections 39 | if ("transport_layer_protocol" not in item) or (("transport_layer_protocol" in item) and (item["transport_layer_protocol"] not in ["UDP", "TCP"])): 40 | continue 41 | 42 | if "{0}:{1}".format(item["destination_ip"], item["destination_port"]) not in traffic_objects: 43 | if self.options["csv"]: 44 | line = "" 45 | for value in ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags"]: 46 | if value not in attributes: 47 | line += self.options["separator"] 48 | continue 49 | 50 | if isinstance(getattr(sample, value), list): 51 | list_items = "" 52 | for list_item in getattr(sample, value): 53 | list_items += "{0}, ".format(list_item) 54 | line += "\"{0}\"{1}".format(list_items[:-2], self.options["separator"]) 55 | else: 56 | line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"]) 57 | 58 | for value in ["destination_ip", "destination_port", "url"]: 59 | line += "\"{0}\"{1}".format(item[value], self.options["separator"]) if (value in item) and (item[value] is not None) else "\"\"{0}".format(self.options["separator"]) 60 | self.options["csv_files"]["network"].write("{0}\n".format(line[:-1])) 61 | 62 | # TODO: Should we only add the host or host:port information? 63 | traffic_objects.append("{0}:{1}".format(item["destination_ip"], item["destination_port"])) 64 | 65 | # extract unique URLs across all sandbox reports 66 | if "http_conversations" in data: 67 | traffic = data["http_conversations"] 68 | for item in traffic: 69 | if item["url"] not in traffic_objects: 70 | if self.options["csv"]: 71 | line = "" 72 | for value in ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags"]: 73 | if value not in attributes: 74 | line += self.options["separator"] 75 | continue 76 | 77 | if isinstance(getattr(sample, value), list): 78 | list_items = "" 79 | for list_item in getattr(sample, value): 80 | list_items += "{0}|".format(list_item) 81 | line += "\"{0}\"{1}".format(list_items[:-2], self.options["separator"]) 82 | else: 83 | line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"]) 84 | 85 | for value in ["destination_ip", "destination_port", "url"]: 86 | line += "\"{0}\"{1}".format(item[value], self.options["separator"]) if (value in item) and (item[value] is not None) else "\"\"{0}".format(self.options["separator"]) 87 | self.options["csv_files"]["network"].write("{0}\n".format(line[:-1])) 88 | 89 | traffic_objects.append(item["url"]) 90 | 91 | # if network indicators were extracted, write the information to an indicator report 92 | # (unless it is not existing already) 93 | filename = os.path.join(self.options["info_dir"], "{0}.ioc".format(sample.id)) 94 | if (len(traffic_objects) > 0) and (not os.path.exists(filename)): 95 | with open(filename, "a") as f: 96 | [ f.write("{0}\n".format(item)) for item in traffic_objects ] 97 | elif (len(traffic_objects) > 0) and (os.path.exists(filename)): 98 | self.options["auxiliary"].log("Network indicator report for sample already exists on disk: {0}".format(sample.id), level = "DEBUG") 99 | else: 100 | #self.options["auxiliary"].log("No network indicators found for sample: {0}".format(sample.id), level = "DEBUG") 101 | pass 102 | 103 | -------------------------------------------------------------------------------- /vti_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import sys 5 | import argparse 6 | import os 7 | import os.path 8 | import asyncio 9 | from datetime import datetime 10 | from lib import auxiliary, vt 11 | 12 | meta = { 13 | "title" : "VTISearch - VirusTotal Intelligence Search", 14 | "note" : "Written by Stefan Voemel.", 15 | "version" : "0.1.6", 16 | } 17 | 18 | 19 | filenames = { 20 | # INFO: file, url, and domain identifiers represent the type of a VirusTotal object 21 | # do not change these identifiers 22 | "artifacts" : "artifacts.txt", 23 | "file" : "samples.csv", 24 | "url" : "urls.csv", 25 | "domain" : "domains.csv", 26 | "network" : "network_iocs.csv", 27 | } 28 | 29 | 30 | def get_header(): 31 | print("\n{0} - Version {1}\n\n{2}".format(meta["title"], meta["version"], meta["note"])) 32 | print("{0}\n".format("-" * 90)) 33 | 34 | 35 | async def main(): 36 | 37 | global options 38 | 39 | # get the full path of the program module 40 | module_name = os.path.abspath(__file__) 41 | module_path = os.path.dirname(module_name) 42 | 43 | opt = argparse.ArgumentParser(epilog = get_header()) 44 | 45 | opt.add_argument("-q", "--query", default="", dest="query", 46 | help="Run a VirusTotal Intelligence search query.") 47 | 48 | opt.add_argument("-l", "--limit", type=int, default=20, dest="limit", 49 | help="Limits the number of samples to return.") 50 | 51 | opt.add_argument("--logfile", type=str, default="log.txt", dest="log", 52 | help="Name of the log file.") 53 | 54 | opt.add_argument("--download-dir", type=str, default="", dest="download_dir", 55 | help="Name of the directory where retrieved information will be stored in.") 56 | 57 | opt.add_argument("-d", "--download", action="store_true", dest="download_samples", 58 | help="If set, also downloads samples from VirusTotal that are referenced in an Intelligence search.") 59 | 60 | opt.add_argument("-f", "--file", default="", dest="sample_file", 61 | help="Downloads samples that are referenced in a file.") 62 | 63 | opt.add_argument("--no-behavior", action="store_false", dest="download_behavior", 64 | help="If set, does not download behavior reports for samples.") 65 | 66 | opt.add_argument("-v", "--verbose", action="count", default=0, dest="verbose", 67 | help="If set, display verbose information about reports.\nUse -vvv to see detailed scan results.") 68 | opt.add_argument("-u", "--update-key", action="store_true", dest="update_api_key", 69 | help="If set, offers to enter a new API key.") 70 | 71 | opt.add_argument("-w", "--workers", type=int, default=5, dest="workers", 72 | help="Number of concurrent workers.") 73 | 74 | opt.add_argument("--csv", action="store_true", dest="csv", 75 | help="If set, display results as comma-separated values.") 76 | 77 | options = vars(opt.parse_args()) 78 | options["separator"] = "," 79 | options["filenames"] = filenames 80 | 81 | if (len(options["query"]) == 0) and (len(options["sample_file"]) == 0): 82 | print("Please either specify a VirusTotal Intelligence search query (-q) or a file with sample hashes (-f).\n") 83 | sys.exit(-1) 84 | 85 | # create a new directory based on the current timestamp that will store all query- and 86 | # download-related information 87 | if len(options["download_dir"]) == 0: 88 | timestamp = (datetime.now().timestamp()) 89 | timestamp = datetime.fromtimestamp(timestamp).strftime("%Y%m%d_%H%M") 90 | 91 | options["download_dir"] = os.path.join(module_path, "downloads", timestamp) 92 | 93 | options["csv_dir"] = os.path.join(options["download_dir"], "csv") 94 | options["info_dir"] = os.path.join(options["download_dir"], "reports") 95 | options["samples_dir"] = os.path.join(options["download_dir"], "samples") 96 | options["reports_dir"] = os.path.join(options["download_dir"], "behavior") 97 | options["log"] = os.path.join(options["download_dir"], options["log"]) 98 | 99 | # create directories if necessary 100 | created = True 101 | for directory in ["download_dir", "csv_dir", "info_dir", "samples_dir", "reports_dir"]: 102 | try: 103 | os.makedirs(options[directory]) 104 | except FileExistsError as err: 105 | pass 106 | except OSError as err: 107 | print("Error while creating directory: {0}".format(err)) 108 | created = False 109 | if not created: sys.exit(-1) 110 | 111 | helper = auxiliary.Auxiliary(options) 112 | options["auxiliary"] = helper 113 | 114 | # get / save API key from / to the system keyring 115 | options["virustotal"] = options["auxiliary"].process_api_key() 116 | 117 | # start interaction with the VirusTotal service 118 | virustotal = vt.VirusTotal_Search(options) 119 | 120 | start_time = datetime.now() 121 | tasks = [] 122 | # perform an Intelligence search (and download respective samples if indicated) 123 | if len(options["query"]) > 0: 124 | tasks.append(asyncio.create_task(virustotal.search())) 125 | 126 | # download samples that are referenced in a file 127 | if (len(options["sample_file"]) > 0) and (os.path.isfile(options["sample_file"])): 128 | if not options["download_samples"]: 129 | options["download_samples"] = True 130 | options["auxiliary"].log("Sample download is automatically enabled.\n", level = "WARNING") 131 | 132 | tasks.append(asyncio.create_task(virustotal.download_samples(options["sample_file"]))) 133 | 134 | await asyncio.gather(*tasks) 135 | for task in tasks: 136 | task.cancel() 137 | 138 | 139 | end_time = datetime.now() 140 | options["auxiliary"].log("\nInformation saved to {0}.".format(options["download_dir"])) 141 | options["auxiliary"].log("Operations completed in {0}.\n".format((end_time - start_time))) 142 | 143 | if options["csv"]: options["auxiliary"].close_csv_files() 144 | 145 | 146 | if __name__ == "__main__": 147 | 148 | # check for Python 3.7+ 149 | if (sys.version_info.major != 3) or ((sys.version_info.major == 3) and (sys.version_info.minor < 7)): 150 | print("Attention: Python 3.7 or higher is required for this program.\nPlease upgrade your Python instance.\n") 151 | sys.exit(-1) 152 | 153 | asyncio.run(main()) 154 | -------------------------------------------------------------------------------- /lib/auxiliary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import os.path 6 | import keyring 7 | from datetime import datetime 8 | import logging 9 | LOGGING_FORMAT = "[%(levelname)s]\t%(asctime)s - %(message)s" 10 | 11 | 12 | class Auxiliary(): 13 | 14 | def __init__(self, options): 15 | 16 | self.options = options 17 | self.logfile = self.init_logger(options["log"]) 18 | 19 | if self.options["csv"]: self.create_csv_files() 20 | 21 | 22 | def log(self, message, logger = None, level = "INFO"): 23 | if self.logfile == None: 24 | return 25 | 26 | if logger == None: 27 | logger = self.logfile 28 | 29 | if level.upper() == "INFO": 30 | logger.info(message) 31 | elif level.upper() == "WARNING": 32 | logger.warning(message) 33 | elif level.upper() == "ERROR": 34 | logger.error(message) 35 | elif level.upper() == "DEBUG": 36 | logger.debug(message) 37 | else: 38 | logger.info(message) 39 | 40 | 41 | def init_logger(self, logfile, formatting = "", write_mode = "w"): 42 | try: 43 | f = open(logfile, write_mode) 44 | f.close() 45 | except IOError: 46 | return None 47 | 48 | logger = logging.getLogger(logfile) 49 | logger.setLevel(logging.DEBUG) 50 | 51 | if formatting == "": 52 | formatting = LOGGING_FORMAT 53 | 54 | formatter = logging.Formatter(formatting) 55 | handler = logging.FileHandler(logfile) 56 | handler.setFormatter(formatter) 57 | handler.setLevel(logging.DEBUG) 58 | 59 | stream = logging.StreamHandler() 60 | stream.setLevel(logging.INFO) 61 | 62 | logger.addHandler(handler) 63 | logger.addHandler(stream) 64 | 65 | return logger 66 | 67 | 68 | def get_logger(self): 69 | return self.logfile 70 | 71 | 72 | def get_date(self): 73 | timestamp = (datetime.now().timestamp()) 74 | 75 | return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d") 76 | 77 | 78 | def convert_timestamp(self, timestamp, format = "%Y-%m-%d %H:%M:%S", output_format = "%Y-%m-%d"): 79 | try: 80 | return datetime.strptime(timestamp, format).strftime(output_format) 81 | except TypeError: 82 | return None 83 | 84 | 85 | def process_api_key(self): 86 | """ Reads the VirusTotal API key from the system keyring (virustotal -> api_key). 87 | If it not stored yet, the user is prompted to provide her key. 88 | 89 | :return: The API key as a string 90 | """ 91 | 92 | api_key = keyring.get_password('virustotal', 'api_key') 93 | message = "" 94 | if api_key is None: 95 | message = "VirusTotal API key is not yet stored in the system keyring." 96 | elif self.options["update_api_key"]: 97 | message = "The VirusTotal API key was requested to be updated." 98 | else: 99 | self.log("VirusTotal API key was read from the system keyring.", level = "DEBUG") 100 | return api_key 101 | 102 | self.log("{0}\nPlease note that you must specify an API key that is valid for the (commercial) Private API in order to fully use this program.\n".format(message), level = "WARNING") 103 | 104 | while True: 105 | try: 106 | key1 = input("Please enter the API key, or press Ctrl+C to abort: ") 107 | key2 = input("Please verify the API key, or press Ctrl+C to abort: ") 108 | 109 | if key1.strip("\n ") == key2.strip("\n "): 110 | api_key = key1.strip("\n ") 111 | keyring.set_password("virustotal", "api_key", api_key) 112 | self.log("VirusTotal API key was saved to the system keyring.", level = "DEBUG") 113 | return api_key 114 | except KeyboardInterrupt: 115 | self.log("\n\nAPI key not entered. Program aborted.\n") 116 | sys.exit(0) 117 | 118 | 119 | def create_csv_header(self, filename, fields): 120 | 121 | try: 122 | file_handle = open(filename, "w") 123 | 124 | line = "#" 125 | for field in fields: line += "{0}{1}".format(field, self.options["separator"]) 126 | file_handle.write("{0}\n".format(line[:-1])) 127 | 128 | return file_handle 129 | except IOError as err: 130 | self.options["auxiliary"].log("CSV file could not be created: {0}".format(filename), level = "ERROR") 131 | return None 132 | 133 | 134 | def create_csv_files(self): 135 | 136 | # saves a dictionary of file handles to CSV files 137 | self.options["csv_files"] = {} 138 | for item in self.options["filenames"]: 139 | filename = self.options["filenames"][item] 140 | if not filename.endswith(".csv"): continue 141 | 142 | fields = [] 143 | # define header fields for each artifact type 144 | if self.options["verbose"] < 3: 145 | if item == "file": 146 | fields = ["SHA256", "MD5", "SHA1", "Vhash", "Size", "Type", "Tags", "First submitted on", "Last submitted on", "Times submitted", "Benign", "Malicious", "Suspicious", "Undetected"] 147 | elif item == "domain": 148 | fields = ["Domain", "Registrar", "Tags", "Created on", "Last modified", "Last updated", "Benign", "Malicious", "Suspicious", "Undetected"] 149 | elif item == "url": 150 | fields = ["URL", "Final URL", "Title", "Tags", "First submitted on", "Last submitted on", "Times submitted", "Benign", "Malicious", "Suspicious", "Undetected"] 151 | else: 152 | if item == "file": 153 | fields = ["SHA256", "MD5", "SHA1", "Vhash", "Size", "Type", "Tags", "Vendor", "Signature", "Result", "Signature Database"] 154 | elif item == "domain": 155 | fields = ["Domain", "Registrar", "Tags", "Vendor", "Signature", "Result", "Signature Database"] 156 | elif item == "url": 157 | fields = ["URL", "Final URL", "Title", "Tags", "Vendor", "Signature", "Result", "Signature Database"] 158 | 159 | # network IOCs for a sample should be created regardless of the verbosity level 160 | if item == "network": 161 | fields = ["SHA256", "MD5", "SHA1", "Vhash", "Size", "Type", "Tags", "Host", "Port", "URL"] 162 | 163 | filename = os.path.join(self.options["csv_dir"], filename) 164 | file_handle = self.create_csv_header(filename, fields) 165 | self.options["csv_files"][item] = file_handle 166 | 167 | 168 | def close_csv_files(self): 169 | 170 | if "csv_files" not in self.options: return 171 | 172 | for filename in self.options["csv_files"]: 173 | if self.options["csv_files"][filename] is not None: 174 | self.options["csv_files"][filename].close() 175 | 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VTISearch - VirusTotal Intelligence Search 2 | 3 | *VTISearch* is a small utility for running a VirusTotal Intelligence search query. A query can include powerful search modifiers (listed in the [documentation](https://support.virustotal.com/hc/en-us/articles/360001385897-File-search-modifiers)) that permit efficient threat research and hunting operations. 4 | 5 | The program leverages v3 of the VirusTotal API. Please note that for Intelligence Search (and most other features of the program), you need a *private* API key, i.e., access to VirusTotal Enterprise. The API key is requested upon the first start and saved to the keyring of the system for security reasons. 6 | 7 | By default, *VTISearch* retrieves information about the first 20 samples that are associated with the search query. However, results for up to 300 samples can be requested as well with the help of the `-l` (`--limit`) parameter. 8 | 9 | Information includes the list of sample hashes (MD5, SHA1, SHA256, and - if existing - the VirusTotal *vhash* similarity hash), the type and size of the artifact, dates of (first and last) submission, and also detection statistics. 10 | 11 | Additional details, e.g., scanning results per vendor, can be displayed when speciying the verbose (`-v`) parameter. Up to three different verbosity levels are supported. 12 | 13 | *VTISearch* is capable of downloading the samples as well as behavioral (dynamic analysis) reports for an Intelligence search. Dynamic analysis reports are also automatically parsed in order to extract network-based Indicators of Compromise (IOCs). 14 | 15 | When using the `--csv` option, results can be exported in CSV format for subsequent import in, e.g., *Maltego* or other graph visualization programs. 16 | 17 | 18 | ## Features 19 | 20 | * Retrieves information for up to 300 artifacts (samples, domains, URLs) that are related to the search query. 21 | * Information includes meta data as well as detailed scanning and detection results upon request. 22 | * Supports the automatic download of associated samples and behavioral (dynamic analysis) reports. 23 | * Behavioral reports are automatically scanned for network-based Indicators of Compromise (IOCs). 24 | * Use of multiple workers to speed up operations. 25 | * All information is categorized in different sub-folders. Detailed logs facilitate post-processing. 26 | * Results can be exported in CSV format for subsequent relationship visualization with, e.g., Maltego. 27 | 28 | 29 | ## Requirements and Installation 30 | 31 | * Linux operating system (tested on Ubuntu 18.04) 32 | 33 | * Python 3.7+ 34 | * pip3 35 | * vt-py 36 | * keyring 37 | 38 | ### Notes on Python 3.7 and Ubuntu 18.04 39 | 40 | 1. By default, Python 3.6 is still installed on Ubuntu 18.04. You can install version 3.7 (or 3.8) with 41 | 42 | ```bash 43 | $ sudo apt-get install python3.7 44 | ``` 45 | 46 | and then change to the new version with\* 47 | 48 | ``` 49 | $ sudo update-alternatives --config python3 50 | ``` 51 | 52 | (\* In case you should get an error message that no alternative had been found, please run `sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1`.) 53 | 54 | If you subsequently run `python3 --version` you should see the new version. Please note that you might have to reinstall respective packages for this version. 55 | 56 | 57 | 2. Please reinstall pip3 with 58 | 59 | ``` 60 | $ sudo apt-get install --reinstall python3-pip 61 | ``` 62 | 63 | 64 | ### Package installation and Repository Cloning 65 | 66 | 1. Once Python 3.7 is avaialble, you can comfortably install all required packages as follows: 67 | 68 | ```bash 69 | $ sudo pip3 install vt-py keyring 70 | ``` 71 | 72 | (I am globally installing the packages in this example. Please feel free to set up a virtual environment instead if you prefer.) 73 | 74 | 75 | 2. Clone the *VTISearch* repository, and start the program: 76 | 77 | ``` 78 | $ git clone https://github.com/svo80/vti_search.git . 79 | 80 | $ cd vti_search && python3 vti_search -h 81 | ``` 82 | 83 | 84 | ## Options and Usage 85 | 86 | ``` 87 | usage: vti_search.py [-h] [-q QUERY] [-l LIMIT] [--logfile LOG] 88 | [--download-dir DOWNLOAD_DIR] [-d] [-f SAMPLE_FILE] 89 | [--no-behavior] [-v] [--csv] 90 | 91 | optional arguments: 92 | -h, --help Show this help message and exit 93 | 94 | -q QUERY, --query QUERY Run a VirusTotal Intelligence search query. 95 | 96 | -l LIMIT, --limit LIMIT Limits the number of samples to return. 97 | 98 | --logfile LOG Name of the log file. 99 | 100 | --download-dir DOWNLOAD_DIR Name of the directory where retrieved information will 101 | be stored in. 102 | 103 | -d, --download If set, also downloads samples from VirusTotal that 104 | are referenced in an Intelligence search. 105 | 106 | -f SAMPLE_FILE, --file SAMPLE_FILE Downloads samples that are referenced in a file. 107 | 108 | --no-behavior If set, does not download behavior reports for 109 | samples. 110 | 111 | -v, --verbose If set, display verbose information about reports. Use 112 | -vvv to see detailed scan results. 113 | 114 | -u, --update-key If set, offers to enter a new API key. 115 | 116 | -w, --workers WORKERS Number of concurrent workers. 117 | 118 | --csv If set, display results as comma-separated values. 119 | ``` 120 | 121 | In the majority of cases, *VTISearch* will be executed with the `-q` (`--query`) parameter. This query is sent to VirusTotal via the `v3` API. Respective samples will not be downloaded by default. However, this procedure can be easily activated with the `-d` parameter. 122 | 123 | ```bash 124 | $ python3 vti_search.py -q "evil.exe" -d 125 | ``` 126 | 127 | Rather than performing an Intelligence search, it is also possible processing a list of hashes that are stored in a file. As such, the program can be used as a quick sample downloader and IOC processor: 128 | 129 | ```bash 130 | $ python3 python3 vti_search.py -f ./iocs.txt 131 | ``` 132 | 133 | The approaches can also be mixed. For instance, you might want to first check the results of a query slightly more in detail, adapt the list of samples in scope, and then re-run the program with the download option enabled for the updated sample list. 134 | 135 | Alternatively, you might want to combine the results of an Intelligence search with indicators highlighted in a (third-party) report in order to create a more detailed overview of a specific campaign or operation. 136 | 137 | By default, all log files, samples, and reports are stored in a separate directory (identified by its timestamp) that is created at program startup in the `downloads` folder. If you prefer rather updating an existing directory, you can explicitly set the `--download-dir` parameter. 138 | 139 | For instance, assuming you would like to investigate an APT campaign, you can perform an Intelligence search, retrieve the first 100 results in detailed format, and store all information in a specific folder as follows: 140 | 141 | ```bash 142 | $ python3 vt_search.py -d -q -l 100 -vvv --download-dir=downloads/apt 143 | ``` 144 | 145 | ## Sample Queries and Intelligence Searches 146 | 147 | The following queries are solely for demonstration purposes to illustrate search capabilities and possible use cases for the program: 148 | 149 | 1. Show samples with detection statistics that were submitted after May 1, 2020 and were detected by more than five but less than 10 vendors. 150 | 151 | ```bash 152 | $ python3 vti_search.py -q "ls:2020-05-01+ positives:5+ positives:10-" -v --no-behavior 153 | ``` 154 | 155 | 156 | 2. Show PDF documents in German that were delivered as an email attachment and contain an embedded JavaScript. 157 | 158 | ``` 159 | $ python3 vti_search.py -q "tag:attachment type:pdf lang:german tag:js-embedded" 160 | ``` 161 | 162 | 163 | 3. Show signed executables with a size of less than 300KB that were detected by more than five vendors. 164 | 165 | ```bash 166 | $ python3 vti_search.py -q "size:300KB- positives:5+ tag:signed type:peexe" 167 | ``` 168 | 169 | 170 | 4. Show up to five samples, representing Microsoft Office documents that execute code upon opening and likely set an AutoRun key for persistence. 171 | 172 | ```bash 173 | $ python3 vti_search.py -q "behavior:'currentversion\run\' type:docx tag:auto-open" -l 5 174 | ``` 175 | 176 | 177 | ## Data Export and Collaboration 178 | 179 | *VTISearch* supports exporting all information in CSV format. Exported contents are dependent on the verbosity level. 180 | 181 | For instance, when specifying the `-vvv` parameter, detailed anti-virus scanning reports will be exported into CSV format. On the other hand, when solely specifying the `-v` parameter, higher level summary statistics will be created. 182 | 183 | The list of network indicators retrieved from dynamic analysis sandbox reports can be exported in CSV format as well. This information can subsequently be loaded with, e.g., [Maltego](https://www.maltego.com/) in order to visualize respective relationships. 184 | 185 | 186 | ## Example Run 187 | 188 | ```bash 189 | $ python3 vti_search.py -d -q evil.exe -l 10 -vv 190 | 191 | VTISearch - VirusTotal Intelligence Search - Version 0.1.0 192 | 193 | Written by Stefan Voemel. 194 | ------------------------------------------------------------------------------------------ 195 | 196 | 2axxxxxxxxxe4b2be454ed0dxxxxxxxxxx7db18e9780xxxxxxxx10dcabxxxxxx 197 | MD5: xxxxx09dxxxxxc271cxxxxx5cb6xxxxx 198 | Sha1: xxxxxx71bxxxxx4aaxxxx383xxxxce8xxxxe00xx 199 | VHash: xxx04xx5xdxx1xx8xxxx2txxxx 200 | 201 | Type: PE32 executable for MS Windows (GUI) Intel 80386 32-bit 202 | Type Tag: peexe 203 | Size: 73802 204 | 205 | First submission: 2020-05-07 11:16:58 206 | Last submission: 2020-05-07 11:16:58 207 | Number of submissions: 1 208 | Unique sources: 1 209 | 210 | Malicious: 58 211 | Suspicious: 0 212 | Undetected: 14 213 | 214 | [Host] 1xx.16.xxx.xxx:4444 215 | 216 | 798xxxx29xxxx4xxxe3dxxxa8xfxx3x2excxxxe7xxc4cxxxd4x4fx4x05xxxxxx 217 | MD5: xxxx27xxxx28xxxx14xxxb34xxx13xxx 218 | Sha1: xxxxb6xxx1f4xxxxdb26xxxx94xxxx5dxx61cxxx 219 | VHash: xxx03xxx7dxxx2xx 220 | 221 | Type: PE32 executable for MS Windows (console) Intel 80386 32-bit 222 | Type Tag: peexe 223 | Size: 4752 224 | 225 | First submission: 2011-07-04 22:00:08 226 | Last submission: 2020-05-06 13:39:21 227 | Number of submissions: 1951 228 | Unique sources: 1472 229 | 230 | Malicious: 58 231 | Suspicious: 0 232 | Undetected: 14 233 | 234 | [Host] 1xx.1xx.221.22:80 235 | [Host] 1xx.1xx.131.241:80 236 | [Host] 1xx.xxx.78.24:443 237 | [Host] 1xx.xxx.78.25:443 238 | [URL] hxxp://www.xxxxxxxx.com/ad.html 239 | ``` 240 | 241 | 242 | ## File Structure 243 | 244 | ```bash 245 | ├── downloads Program data 246 | │   └── 247 | │   ├── artifacts.txt List of artifacts that were in scope 248 | │   ├── behavior/ Directory for behavioral reports 249 | │   ├── csv Directory with CSV files 250 | │   │   ├── domains.csv Exported domains (if existing) 251 | │   │   ├── network_iocs.csv Exported network indicators 252 | │   │   ├── samples.csv Exported samples (if existing) 253 | │   │   └── urls.csv Exorted URLs (if existing) 254 | │   ├── log.txt Detailed log file with program runtime messages 255 | │   ├── reports/ Directory for summary reports and network indicators (*.ioc) 256 | │   │   ├── Textual summary report for a sample 257 | │   │   ├── Extracted network indicators for a sample 258 | │   │   ├── Static analysis report for a sample in JSON format 259 | │   ├── samples/ Directory for malware samples 260 | │   261 | ├── lib Program libraries 262 | │   ├── auxiliary.py 263 | │   ├── sandboxes.py 264 | │   └── vt.py 265 | │   266 | ├── README.md 267 | └── vti_search.py Main program file 268 | ``` 269 | 270 | 271 | ## Comments and Additional Notes 272 | 273 | I am not a professional developer or software engineer, and this program should be seen as a small helper tool. While I do enjoy periodically writing smaller utilities in my free time for Incident Response, malware analysis, and Threat Intelligence scenarios, I very rarely upload any of them. 274 | 275 | The only reason why I did so for this program is, because the number of alternatives for the v3 VirusTotal API is currently still very much limited. This being said, I spend the vast majority of my time (i.e., my professional life) with leading security teams and offering strategic advice and guidance on a higher level. As such, if you believe that the code is *\*, you are probably right. 276 | -------------------------------------------------------------------------------- /lib/vt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import vt 4 | import sys 5 | import os.path 6 | import json 7 | import re 8 | import requests 9 | import asyncio 10 | 11 | from .artifact import Artifact 12 | from .sandboxes import Sandbox_Parser 13 | 14 | 15 | class VirusTotal_Search(Artifact): 16 | """ Provides a class for running a VirusTotal Intelligence search and processing respective 17 | results. 18 | 19 | By default, at max 300 results are returned per query. 20 | """ 21 | 22 | def __init__(self, options): 23 | 24 | super().__init__(options) 25 | 26 | self.options = options 27 | self.auxiliary = options["auxiliary"] 28 | 29 | 30 | self.site = { 31 | 32 | "url" : "https://virustotal.com/api/v3/", 33 | "header" : { 34 | "x-apikey" : self.options["virustotal"] 35 | } 36 | } 37 | 38 | self.client = vt.Client(self.options["virustotal"]) 39 | 40 | # TODO: consolidate queues 41 | self.sample_queue = asyncio.Queue() 42 | self.behavior_queue = asyncio.Queue() 43 | self.info_queue = asyncio.Queue() 44 | 45 | 46 | async def search(self): 47 | """ Executes a VirusTotal Intelligence search 48 | """ 49 | 50 | async with vt.Client(self.options["virustotal"]) as client: 51 | self.options["auxiliary"].log("Running intelligence query: {0}".format(self.options["query"])) 52 | it = client.iterator('/intelligence/search', params={'query': self.options["query"]}, limit=self.options["limit"]) 53 | 54 | artifact_log = os.path.join(self.options["download_dir"], self.options["filenames"]["artifacts"]) 55 | 56 | tasks = [] 57 | asyncio.create_task(self.get_heartbeat()) 58 | with open(artifact_log, "w") as f: 59 | # iterate through the result set - each element represents a File object 60 | try: 61 | async for obj in it: 62 | if obj.type not in ["file", "url", "domain"]: 63 | self.options["auxiliary"].log("Warning: Unknown artifact type detected: {0} - {1:70}".format(obj.type, obj.id), level="WARNING") 64 | continue 65 | 66 | # log the name / identifier of the artifact 67 | if obj.type in ["file", "domain"]: 68 | f.write("{0}\n".format(obj.id)) 69 | elif obj.type == "url": 70 | f.write("{0} => {1}\n".format(obj.id, obj.url)) 71 | 72 | # for samples, request downloading the artifact and behavior report 73 | if obj.type == "file": 74 | if self.options["download_samples"] : await self.sample_queue.put(obj) 75 | if self.options["download_behavior"] : await self.behavior_queue.put(obj) 76 | 77 | # save the report summary 78 | sample_report = os.path.join(self.options["info_dir"], obj.id) 79 | super().display_information(obj, sample_report) 80 | except vt.error.APIError as err: 81 | 82 | if err.code in ["AuthenticationRequiredError", "ForbiddenError", "UserNotActiveError", "WrongCredentialsError"]: 83 | self.auxiliary.log("The API key is not valid for accessing the VirusTotal Private API, or there was a problem with the user account.", level = "ERROR") 84 | elif err.code in ["QuotaExceededError", "TooManyRequestsError"]: 85 | self.auxiliary.log("The quota for the API key or the number of issued requests has been exceeded.", level = "ERROR") 86 | else: 87 | self.auxiliary.log("There was an error while processing the request: {0}".format(err.code), level="ERROR") 88 | 89 | return None 90 | 91 | 92 | for worker in range(self.options["workers"]): 93 | if self.options["download_behavior"]: tasks.append(asyncio.create_task(self.get_behavior_report())) 94 | if self.options["download_samples"]: tasks.append(asyncio.create_task(self.get_sample())) 95 | 96 | await asyncio.gather(*tasks) 97 | await self.behavior_queue.join() 98 | await self.sample_queue.join() 99 | for task in tasks: task.cancel() 100 | 101 | 102 | async def download_samples(self, filename): 103 | """ Reads in a list of hashes from a file for subsequent sample download 104 | 105 | :param filename: The name of the file that contains the list of hashes 106 | """ 107 | 108 | md5 = re.compile(r"([a-fA-F\d]{32})") 109 | sha1 = re.compile(r"([a-fA-F\d]{40})") 110 | sha256 = re.compile(r"([a-fA-F\d]{64})") 111 | 112 | samples = [] 113 | asyncio.create_task(self.get_heartbeat()) 114 | with open(filename, "r") as f: 115 | for data in f: 116 | data = data.strip("\n ") 117 | if md5.match(data) or sha1.match(data) or sha256.match(data): 118 | # if the entry in the file represents a sample by hash, and the 119 | # sample is appearing for the first time, add it to the queue 120 | if data not in samples: 121 | await self.info_queue.put(data) 122 | samples.append(data) 123 | 124 | # retrieve summary information and check if the sample exists 125 | tasks = [] 126 | for worker in range(self.options["workers"]): 127 | result = tasks.append(asyncio.create_task(self.get_sample_info())) 128 | 129 | results = await asyncio.gather(*tasks) 130 | await self.info_queue.join() 131 | for task in tasks: task.cancel() 132 | 133 | # download artifacts that are existing as well as corresponding behavior reports 134 | for worker in results: 135 | for sample in worker: 136 | if sample is not None: 137 | if self.options["download_samples"] : await self.sample_queue.put(sample) 138 | if self.options["download_behavior"] : await self.behavior_queue.put(sample) 139 | 140 | tasks = [] 141 | for worker in range(self.options["workers"]): 142 | if self.options["download_behavior"]: tasks.append(asyncio.create_task(self.get_behavior_report())) 143 | if self.options["download_samples"]: tasks.append(asyncio.create_task(self.get_sample())) 144 | 145 | await asyncio.gather(*tasks) 146 | await self.behavior_queue.join() 147 | await self.sample_queue.join() 148 | for task in tasks: task.cancel() 149 | 150 | 151 | async def execute_request(self, request): 152 | """ Runs an asynchronous call to retreive a behavioral report from VirusTotal 153 | 154 | :param request: The API request to execute 155 | 156 | :return: JSON output that is contained in the 'data' field 157 | """ 158 | 159 | async with vt.Client(self.options["virustotal"]) as client: 160 | try: 161 | url = requests.compat.urljoin(self.site["url"], request) 162 | result = await client.get_json_async(url) 163 | 164 | if "data" not in result: 165 | raise ValueError("No valid JSON report received") 166 | 167 | return result["data"] 168 | except vt.error.APIError as err: 169 | return None 170 | except ValueError as err: 171 | self.options["auxiliary"].log("Behavior report for sample did not contain valid data: {0}".format(url)) 172 | return None 173 | 174 | 175 | async def get_heartbeat(self): 176 | """ Periodically print a status message of the queue to indicate the number of pending tasks 177 | """ 178 | 179 | while True: 180 | sys.stdout.write("\033[94m[Queue] Sample Reports: {0:03d} - Artifacts: {1:03d} - Behavior Reports: {2:03d}\033[0m\r".format(self.info_queue.qsize(), self.sample_queue.qsize(), self.behavior_queue.qsize())) 181 | sys.stdout.flush() 182 | await asyncio.sleep(1) 183 | 184 | 185 | async def get_sample_info(self): 186 | """ Retrieves summary information about a sample 187 | """ 188 | 189 | samples = [] 190 | async with vt.Client(self.options["virustotal"]) as client: 191 | while not self.info_queue.empty(): 192 | try: 193 | sample_id = await self.info_queue.get() 194 | path = os.path.join("/files", sample_id) 195 | 196 | # this call should be always performed to check if the sample exists 197 | # and get context information for a hash value 198 | result = await client.get_object_async(path) 199 | 200 | sample_report = os.path.join(self.options["info_dir"], sample_id) 201 | super().display_information(result, sample_report) 202 | 203 | samples.append(result) 204 | except vt.error.APIError as err: 205 | if err.code == "NotFoundError": 206 | self.options["auxiliary"].log("Sample was not found: {0}\n".format(sample_id), level = "WARNING") 207 | self.info_queue.task_done() 208 | continue 209 | elif err.code in ["AuthenticationRequiredError", "ForbiddenError", "UserNotActiveError", "WrongCredentialsError"]: 210 | self.auxiliary.log("The API key is not valid for accessing the VirusTotal Private API, or there was a problem with the user account.", level = "ERROR") 211 | elif err.code in ["QuotaExceededError", "TooManyRequestsError"]: 212 | self.auxiliary.log("The quota for the API key or the number of issued requests has been exceeded.", level = "ERROR") 213 | else: 214 | self.auxiliary.log("There was an error while processing the request: {0}".format(err.code), level="ERROR") 215 | 216 | # clear all remaining items in the queue 217 | while not self.info_queue.empty(): 218 | await self.info_queue.get() 219 | self.info_queue.task_done() 220 | 221 | self.info_queue.task_done() 222 | 223 | return samples 224 | 225 | 226 | async def get_behavior_report(self): 227 | """ Retrieves a behavior report from VirusTotal 228 | (The behavior report can consist of a result list from multiple sandboxes) 229 | 230 | :return: True if the report was successfully downloaded or was successfully 231 | read from disk (if existing), otherwise False 232 | """ 233 | 234 | async with vt.Client(self.options["virustotal"]) as client: 235 | while not self.behavior_queue.empty(): 236 | sample = await self.behavior_queue.get() 237 | sample_id = sample if isinstance(sample, str) else sample.id 238 | 239 | # check if a sample object rather than a hash was provided 240 | report_file = os.path.join(self.options["reports_dir"], sample_id) 241 | report_retrieved = False 242 | 243 | # if the report file is not on disk yet, it is downloaded 244 | if not os.path.isfile(report_file): 245 | url = 'files/{0}/behaviours'.format(sample_id) 246 | result = await self.execute_request(url) 247 | 248 | if result is None: 249 | self.options["auxiliary"].log("Sample does not have a behavior report, or the report could not be retrieved: {0}".format(sample_id), level="ERROR") 250 | self.behavior_queue.task_done() 251 | continue 252 | try: 253 | with open(report_file, "w") as f: 254 | json.dump(result, f) 255 | 256 | self.options["auxiliary"].log("Saved behaviorial report: {0}".format(report_file), level = "DEBUG") 257 | report_retrieved = True 258 | except IOError as err: 259 | self.options["auxiliary"].log("Error while saving behaviorial report: {0} - {1}".format(report_file, err), level = "ERROR") 260 | else: 261 | # the report has already been downloaded and is stored on disk 262 | self.options["auxiliary"].log("Behavior report for sample already exists on disk and is not downloaded again: {0}".format(sample_id), level = "DEBUG") 263 | 264 | try: 265 | with open(report_file, "r") as f: 266 | result = json.load(f) 267 | 268 | report_retrieved = True 269 | except (IOError, json.JSONDecodeError) as err: 270 | self.options["auxiliary"].log("Error while reading behaviorial report: {0} - {1}".format(report_file, err), level = "ERROR") 271 | 272 | if report_retrieved: 273 | sandbox = Sandbox_Parser(self.options, result) 274 | sandbox.parse_report(sample) 275 | 276 | self.behavior_queue.task_done() 277 | 278 | 279 | async def get_sample(self): 280 | """ Downloads a sample from VirusTotal 281 | 282 | :param sample_id: The id (hash value) of the sample 283 | 284 | :return: True if the sample was successfully downloaded, otherwise False 285 | (In case the sample already exists on disk, the return value 286 | is also False) 287 | """ 288 | 289 | async with vt.Client(self.options["virustotal"]) as client: 290 | while not self.sample_queue.empty(): 291 | try: 292 | sample_id = await self.sample_queue.get() 293 | # check if a sample object rather than a hash was provided 294 | if not isinstance(sample_id, str): sample_id = sample_id.id 295 | 296 | sample_path = os.path.join(self.options["samples_dir"], sample_id) 297 | 298 | # if the file is already on disk, it is not downloaded again 299 | # TODO: Possibly check more than purely the filename to be sure the content was previously 300 | # correctly downloaded as well? 301 | if os.path.isfile(sample_path): 302 | self.options["auxiliary"].log("Sample already exists on disk and is not downloaded again: {0}".format(sample_id), level = "DEBUG") 303 | self.sample_queue.task_done() 304 | continue 305 | 306 | # save the sample to disk 307 | with open(sample_path, "wb") as f: 308 | await client.download_file_async(sample_id, f) 309 | self.options["auxiliary"].log("Successfully downloaded sample: {0}".format(sample_id), level = "DEBUG") 310 | 311 | self.sample_queue.task_done() 312 | except IOError as err: 313 | self.options["auxiliary"].log("Error while downloading sample: {0}".format(err), level = "ERROR") 314 | self.sample_queue.task_done() 315 | -------------------------------------------------------------------------------- /lib/artifact.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os.path 4 | import json 5 | 6 | """ 7 | Super class for displaying information about an artifact and / or saving the information to 8 | an artifact report file on disk. 9 | 10 | 11 | Keywords and description: 12 | https://developers.virustotal.com/v3.0/reference#files 13 | 14 | - Hashes like md5, sha1 and sha256 that identifies it 15 | - size of the file 16 | - first_submission_date when the file was first received in VirusTotal (as a UNIX timestamp) 17 | - last_submission_date last time we received it (as a UNIX timestamp) 18 | - last_analysis_date last time we analysed it (as a UNIX timestamp) 19 | - last_modification_date last time the object itself was modified (as a UNIX timestamp) 20 | - times_submitted how many times VirusTotal had received it 21 | - last_analysis_results: result of the last analysis. 22 | 23 | dict with AV name as key and a dict with notes/result from that scanner as value. 24 | category: normalized result. can be: 25 | 26 | - "harmless" (AV thinks the file is not malicious), 27 | - "undetected" (AV has no opinion about this file), 28 | - "suspicious" (AV thinks the file is suspicious), 29 | - "malicious" (AV thinks the file is malicious). 30 | 31 | - names we have seen the file with, being meaningful_name the one we consider more interesting 32 | - unique_sources indicates from how many different sources the file has been received 33 | 34 | 35 | In the attributes dictionary you are going to find also fields with information extracted from the file itself. We characterise the file and expose this information in the following keys: 36 | 37 | - type_description describe the type of file it is, being type_tag it short and you can use to search files of the same kind. 38 | - creation_date is extracted when possible from the file and indicates the timestamp the compilation or build tool give to it when created, it can be also faked by malware creators. 39 | - total_votes received from the VirusTotal community, each time a user vote a file it is reflected in this values. reputation field is calculated from the votes the file received and the users reputations credits. 40 | - vhash an in-house similarity clustering algorithm value, based on a simple structural feature hash allows you to find similar files 41 | - tags are extracted from different parts of the report and are labels that help you search similar samples 42 | 43 | Additionally VirusTotal together with each Antivirus scan runs a set of tool that allows us to collect more information about the file. All this tool information is included in the "attributes" key, together with the rest of fields previously described. 44 | 45 | """ 46 | 47 | # Translation map for internal objects 48 | KEYWORD_MAP = { 49 | # file attributes 50 | "md5" : "MD5", 51 | "sha1" : "Sha1", 52 | "vhash" : "VHash", 53 | "first_submission_date" : "First submission", 54 | "last_submission_date" : "Last submission", 55 | "times_submitted" : "Number of submissions", 56 | "unique_sources" : "Unique sources", 57 | "size" : "Size", 58 | "type_tag" : "Type", 59 | "tags" : "Tag(s)", 60 | "magic" : "File description", 61 | 62 | # domain attributes 63 | "creation_date" : "Creation date", 64 | "last_modification_date": "Last modified", 65 | "last_update_date" : "Last updated", 66 | "registrar" : "Registrar", 67 | 68 | # url attributes 69 | "title" : "Title", 70 | "last_final_url" : "Final URL", 71 | 72 | # attributes for scan results 73 | "harmless" : "Benign", 74 | "suspicious" : "Suspicious", 75 | "malicious" : "Malicious", 76 | "undetected" : "Undetected", 77 | "failure" : "Failure", 78 | "type-unsupported" : "Unsupported", 79 | } 80 | 81 | 82 | class Artifact(): 83 | """ Provides a class for running a VirusTotal Intelligence search and processing respective 84 | results. 85 | 86 | By default, at max 300 results are returned per query. 87 | """ 88 | 89 | def __init__(self, options): 90 | 91 | self.options = options 92 | self.auxiliary = options["auxiliary"] 93 | 94 | 95 | def display_scanning_results(self, sample, required_verbose_level = 0, file_handle = None): 96 | """ Displays scanning results per anti-virus vendor 97 | 98 | :param sample: The sample object 99 | :param required_verbose_level: Displays results on screen if the verbose level 100 | is high enough, otherwise only logs results to a file 101 | :param file_handle: If set, writes information to an artifact report file 102 | """ 103 | 104 | results = sample.last_analysis_results 105 | for item in results: 106 | engine = results[item] 107 | 108 | # category can be, e.g., suspicious, malicious, undetected, etc. 109 | category = KEYWORD_MAP[engine["category"]] if engine["category"] in KEYWORD_MAP else engine["category"] 110 | signature = engine["result"] if engine["result"] is not None else "--" 111 | if len(signature) > 40: signature = "{0} (...)".format(signature[:40]) 112 | 113 | if "engine_update" in engine and engine["engine_update"] is not None: 114 | signature_database = engine["engine_update"] 115 | else: 116 | signature_database = "--" 117 | 118 | string = "{0}{1:28}{2:47}{3:25}(Signature Database: {4})".format(" " * 2, engine["engine_name"], signature, category, signature_database) 119 | if self.options["verbose"] >= required_verbose_level: print(string) 120 | if file_handle is not None: file_handle.write("{0}\n".format(string)) 121 | 122 | if self.options["csv"] and self.options["verbose"] >= 3: 123 | line = "" 124 | attributes = dir(sample) 125 | 126 | if sample.type == "file": 127 | fields = ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags"] 128 | elif sample.type == "domain": 129 | fields = ["id", "registrar", "tags"] 130 | elif sample.type == "url": 131 | fields = ["url", "last_final_url", "title", "tags"] 132 | else: 133 | fields = [] 134 | 135 | for value in fields: 136 | if value not in attributes: 137 | line += self.options["separator"] 138 | continue 139 | 140 | if isinstance(getattr(sample, value), list): 141 | list_items = "" 142 | for item in getattr(sample, value): 143 | list_items += "{0}|".format(item) 144 | line += "\"{0}\"{1}".format(list_items[:-1], self.options["separator"]) 145 | else: 146 | line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"]) 147 | for value in ["engine_name", "result", "category", "engine_update"]: 148 | 149 | if value in engine and engine[value] is not None: 150 | line += "\"{0}\"{1}".format(engine[value], self.options["separator"]) 151 | else: 152 | line += "\"\"{0}".format(self.options["separator"]) 153 | 154 | self.options["csv_files"][sample.type].write("{0}\n".format(line[:-1])) 155 | 156 | if self.options["verbose"] >= required_verbose_level: print() 157 | if file_handle is not None: file_handle.write("\n") 158 | 159 | 160 | def display_values(self, id_list, sample, filter_values = None, required_verbose_level = 0, file_handle = None): 161 | """ 162 | :param id_list: List of attributes that should be processed 163 | :param sample: The sample object 164 | :param filter_values: White list of values that should be exclusively considered 165 | when parsing an attribute list 166 | :param required_verbose_level: Displays results on screen if the verbose level 167 | is high enough, otherwise only logs results to a file 168 | :param file_handle: If set, writes information to an artifact report file 169 | """ 170 | 171 | for value in id_list: 172 | if value not in dir(sample): continue 173 | 174 | if isinstance(getattr(sample, value), dict): 175 | for item in getattr(sample, value): 176 | if filter_values is not None and isinstance(filter_values, list): 177 | if item not in filter_values: continue 178 | 179 | label = KEYWORD_MAP[item] if item in KEYWORD_MAP else item 180 | 181 | string = "{0}{1:28}{2}".format(" " * 2, label + ":", getattr(sample, value)[item]) 182 | if self.options["verbose"] >= required_verbose_level: print(string) 183 | if file_handle is not None: file_handle.write("{0}\n".format(string)) 184 | elif isinstance(getattr(sample, value), list): 185 | line = "" 186 | for item in getattr(sample, value): 187 | line += "{0}, ".format(item) 188 | label = KEYWORD_MAP[value] if value in KEYWORD_MAP else value 189 | 190 | string = "{0}{1:28}{2}".format(" " * 2, label + ":", line[:-2]) 191 | if self.options["verbose"] >= required_verbose_level: print(string) 192 | if file_handle is not None: file_handle.write("{0}\n".format(string)) 193 | else: 194 | label = KEYWORD_MAP[value] if value in KEYWORD_MAP else value 195 | string = "{0}{1:28}{2}".format(" " * 2, label + ":", getattr(sample, value)) 196 | if self.options["verbose"] >= required_verbose_level: print(string) 197 | if file_handle is not None: file_handle.write("{0}\n".format(string)) 198 | 199 | if self.options["verbose"] >= required_verbose_level: print("") 200 | if file_handle is not None: file_handle.write("\n") 201 | 202 | 203 | def display_information(self, sample, filename = None): 204 | """ 205 | Displays information about an artifact that was returned as part of a search query. 206 | Displayed information is dependent on the artifact type. 207 | 208 | :param sample: Sample object (type: file, domain, url) 209 | :param filename: Name of a report file 210 | """ 211 | 212 | identifier = "" 213 | if sample.type in ["file", "domain"]: 214 | # INFO: For domains, the identifier is the domain name 215 | # This appears to be okay, as for unicode characters an internationalized domain 216 | # name is returned which should not cause any conflict with the file system level 217 | # TODO: check this with dedicated tests 218 | identifier = sample.id 219 | elif sample.type == "url": 220 | identifier = sample.url 221 | else: 222 | self.options["auxiliary"].log("Unknown sample type detected: {0} - {1}".format(sample.type, sample.id), level="WARNING") 223 | print("{0:80}".format(identifier)) 224 | 225 | # write the summary information to disk if a filename was provided and the report 226 | # does not exist yet, otherwise only log but do not rewrite 227 | file_handle = None 228 | if (filename is not None) and (not os.path.exists(filename)): 229 | file_handle = open(filename, "w") 230 | file_handle.write("{0}\n".format(identifier)) 231 | elif (filename is not None) and (os.path.exists(filename)): 232 | self.options["auxiliary"].log("Summary report for the sample already exists on disk and is not downloaded again: {0}".format(sample.id), level = "DEBUG") 233 | 234 | # write the raw report to disk if a filename was provided and the report 235 | # does not exist yet, otherwise only log but do not rewrite 236 | raw_filename = "{0}.raw".format(filename) 237 | if (filename is not None) and (not os.path.exists(raw_filename)): 238 | try: 239 | with open(raw_filename, "w") as f: 240 | json.dump(sample.to_dict(), f) 241 | except (IOError, TypeError) as err: 242 | self.options["auxiliary"].log("There was an error while saving the raw report to disk for sample: {0} - {1}".format(sample.id, err), level="ERROR") 243 | elif (filename is not None) and (os.path.exists(raw_filename)): 244 | self.options["auxiliary"].log("The raw report for the sample already exists on disk and is not downloaded again: {0}".format(sample.id), level = "DEBUG") 245 | 246 | 247 | if self.options["csv"] and self.options["verbose"] < 3: 248 | line = "" 249 | attributes = dir(sample) 250 | 251 | # determine output fields by artifact type 252 | fields = [] 253 | if sample.type == "file": 254 | fields = ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags", "first_submission_date", "last_submission_date", "times_submitted"] 255 | elif sample.type == "domain": 256 | fields = ["id", "registrar", "tags", "creation_date", "last_modification_date", "last_update_date"] 257 | elif sample.type == "url": 258 | fields = ["url", "last_final_url", "title", "tags", "first_submission_date", "last_submission_date", "times_submitted"] 259 | else: 260 | fields = [] 261 | 262 | for value in fields: 263 | if value not in attributes: 264 | line += self.options["separator"] 265 | continue 266 | 267 | if isinstance(getattr(sample, value), list): 268 | list_items = "" 269 | for item in getattr(sample, value): 270 | list_items += "{0}|".format(item) 271 | line += "\"{0}\"{1}".format(list_items[:-1], self.options["separator"]) 272 | else: 273 | line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"]) 274 | 275 | for value in ["harmless", "malicious", "suspicious", "undetected"]: 276 | if (("last_analysis_stats" in attributes) and (value in sample.last_analysis_stats.keys())): 277 | line += "\"{0}\"{1}".format(sample.last_analysis_stats[value], self.options["separator"]) 278 | else: 279 | line += "\"{0}\"".format(self.options["separator"]) 280 | 281 | self.options["csv_files"][sample.type].write("{0}\n".format(line[:-1])) 282 | 283 | # verbose level 1 284 | if sample.type == "file": 285 | values = ["md5", "sha1", "vhash"] 286 | elif sample.type == "domain": 287 | values = ["creation_date", "last_modification_date", "last_update_date"] 288 | elif sample.type == "url": 289 | values = ["last_final_url", "title"] 290 | else: 291 | values = [] 292 | self.display_values(values, sample, required_verbose_level = 1, file_handle = file_handle) 293 | 294 | values = ["magic", "type_tag", "tags", "size"] 295 | self.display_values(values, sample, required_verbose_level = 1, file_handle = file_handle) 296 | 297 | # verbose level 2 298 | if sample.type in ["file", "url"]: 299 | values = ["first_submission_date", "last_submission_date", "times_submitted", "unique_sources"] 300 | elif sample.type == "domain": 301 | values = ["registrar"] 302 | else: 303 | values = [] 304 | self.display_values(values, sample, required_verbose_level = 2, file_handle = file_handle) 305 | 306 | values = ["last_analysis_stats"] 307 | self.display_values(values, sample, ["harmless", "malicious", "suspicious", "undetected"], required_verbose_level = 1, file_handle = file_handle) 308 | 309 | # verbose level 3 310 | self.display_scanning_results(sample, required_verbose_level = 3, file_handle = file_handle) 311 | 312 | if file_handle is not None: 313 | file_handle.close() 314 | self.options["auxiliary"].log("Saved summary report: {0}".format(filename), level = "DEBUG") 315 | 316 | 317 | 318 | --------------------------------------------------------------------------------