├── TODO.txt
├── lib
    ├── sandboxes.py
    ├── auxiliary.py
    ├── vt.py
    └── artifact.py
├── vti_search.py
└── README.md


/TODO.txt:
--------------------------------------------------------------------------------
 1 | TODO:
 2 | 
 3 | - consolidate queues, and refactor
 4 | - check if sequential saving of report files (e.g., *.raw) leads to significant delays
 5 |   (potentially change this to asynchronous processing as well)
 6 | - support processing livehunt notifications
 7 | - support looking up URLs and domains when information is provided via a file (-f) rather than
 8 |   via a query ( - for intelligence searches, looking up domains and URLs is already supported)
 9 | - support sort options for Intelligence searches
10 | 


--------------------------------------------------------------------------------
/lib/sandboxes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import sys
  4 | import os.path
  5 | 
  6 | class Sandbox_Parser():
  7 | 
  8 |         def __init__(self, options, report):
  9 |             """ 
 10 |                 :param report: A (collection of) sandbox report(s) in JSON format
 11 |             """
 12 | 
 13 |             self.options = options
 14 |             self.report = report
 15 | 
 16 |             self.auxiliary = options["auxiliary"]
 17 | 
 18 | 
 19 |         def parse_report(self, sample, required_verbose_level = 1):
 20 |             """ Parses the (list of) sandbox report(s) that are defined in a dynamic analysis 
 21 |                 collection, and extracts the network indicators
 22 | 
 23 |                 :param sample: A sample object
 24 |             """
 25 | 
 26 |             traffic_objects = []
 27 |             verbose_level = "INFO" if self.options["verbose"] >= required_verbose_level else "DEBUG"
 28 |             
 29 |             for sandbox in self.report:
 30 |                 if "attributes" not in sandbox or "sandbox_name" not in sandbox["attributes"]: continue
 31 |                 data = sandbox["attributes"]
 32 |                 attributes = dir(sample)
 33 |                 
 34 |                 # extract unique network indicators across all sandbox reports
 35 |                 if "ip_traffic" in data:
 36 |                     traffic = data["ip_traffic"]
 37 |                     for item in traffic:
 38 |                         # only consider UDP or TCP connections
 39 |                         if ("transport_layer_protocol" not in item) or (("transport_layer_protocol" in item) and (item["transport_layer_protocol"] not in ["UDP", "TCP"])):
 40 |                             continue
 41 | 
 42 |                         if "{0}:{1}".format(item["destination_ip"], item["destination_port"]) not in traffic_objects:
 43 |                             if self.options["csv"]:
 44 |                                 line = ""
 45 |                                 for value in ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags"]:
 46 |                                     if value not in attributes:
 47 |                                         line += self.options["separator"]
 48 |                                         continue
 49 |                                     
 50 |                                     if isinstance(getattr(sample, value), list):
 51 |                                         list_items = ""
 52 |                                         for list_item in getattr(sample, value):
 53 |                                             list_items += "{0}, ".format(list_item)
 54 |                                         line += "\"{0}\"{1}".format(list_items[:-2], self.options["separator"])
 55 |                                     else:
 56 |                                         line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"])
 57 | 
 58 |                                 for value in ["destination_ip", "destination_port", "url"]:
 59 |                                     line += "\"{0}\"{1}".format(item[value], self.options["separator"]) if (value in item) and (item[value] is not None) else "\"\"{0}".format(self.options["separator"])
 60 |                                 self.options["csv_files"]["network"].write("{0}\n".format(line[:-1]))
 61 | 
 62 |                             # TODO: Should we only add the host or host:port information?
 63 |                             traffic_objects.append("{0}:{1}".format(item["destination_ip"], item["destination_port"]))
 64 | 
 65 |                 # extract unique URLs across all sandbox reports
 66 |                 if "http_conversations" in data:
 67 |                     traffic = data["http_conversations"]
 68 |                     for item in traffic:
 69 |                         if item["url"] not in traffic_objects:
 70 |                             if self.options["csv"]:
 71 |                                 line = ""
 72 |                                 for value in ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags"]:
 73 |                                     if value not in attributes:
 74 |                                         line += self.options["separator"] 
 75 |                                         continue
 76 |                                     
 77 |                                     if isinstance(getattr(sample, value), list):
 78 |                                         list_items = ""
 79 |                                         for list_item in getattr(sample, value):
 80 |                                             list_items += "{0}|".format(list_item)
 81 |                                         line += "\"{0}\"{1}".format(list_items[:-2], self.options["separator"])
 82 |                                     else:
 83 |                                         line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"])
 84 | 
 85 |                                 for value in ["destination_ip", "destination_port", "url"]:
 86 |                                     line += "\"{0}\"{1}".format(item[value], self.options["separator"]) if (value in item) and (item[value] is not None) else "\"\"{0}".format(self.options["separator"])
 87 |                                 self.options["csv_files"]["network"].write("{0}\n".format(line[:-1]))
 88 | 
 89 |                             traffic_objects.append(item["url"])
 90 | 
 91 |             # if network indicators were extracted, write the information to an indicator report
 92 |             # (unless it is not existing already)
 93 |             filename = os.path.join(self.options["info_dir"], "{0}.ioc".format(sample.id))
 94 |             if (len(traffic_objects) > 0) and (not os.path.exists(filename)): 
 95 |                 with open(filename, "a") as f:
 96 |                     [ f.write("{0}\n".format(item)) for item in traffic_objects ]
 97 |             elif (len(traffic_objects) > 0) and (os.path.exists(filename)):
 98 |                 self.options["auxiliary"].log("Network indicator report for sample already exists on disk: {0}".format(sample.id), level = "DEBUG")
 99 |             else:
100 |                 #self.options["auxiliary"].log("No network indicators found for sample: {0}".format(sample.id), level = "DEBUG")
101 |                 pass
102 | 
103 | 


--------------------------------------------------------------------------------
/vti_search.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding=utf-8
  3 | 
  4 | import sys 
  5 | import argparse
  6 | import os
  7 | import os.path
  8 | import asyncio
  9 | from datetime import datetime
 10 | from lib import auxiliary, vt
 11 | 
 12 | meta =      {
 13 |                 "title"     :   "VTISearch - VirusTotal Intelligence Search",
 14 |                 "note"      :   "Written by Stefan Voemel.",
 15 |                 "version"   :   "0.1.6",
 16 |             }
 17 | 
 18 | 
 19 | filenames = {
 20 |                 # INFO: file, url, and domain identifiers represent the type of a VirusTotal object
 21 |                 #       do not change these identifiers
 22 |                 "artifacts" :   "artifacts.txt",
 23 |                 "file"      :   "samples.csv",
 24 |                 "url"       :   "urls.csv",
 25 |                 "domain"    :   "domains.csv",
 26 |                 "network"   :   "network_iocs.csv",
 27 |             }
 28 | 
 29 | 
 30 | def get_header():
 31 |     print("\n{0} - Version {1}\n\n{2}".format(meta["title"], meta["version"], meta["note"]))
 32 |     print("{0}\n".format("-" * 90))
 33 | 
 34 | 
 35 | async def main():
 36 | 
 37 |     global options 
 38 |     
 39 |     # get the full path of the program module
 40 |     module_name = os.path.abspath(__file__)
 41 |     module_path = os.path.dirname(module_name)
 42 | 
 43 |     opt = argparse.ArgumentParser(epilog = get_header())
 44 | 
 45 |     opt.add_argument("-q", "--query", default="", dest="query",
 46 |         help="Run a VirusTotal Intelligence search query.")
 47 |     
 48 |     opt.add_argument("-l", "--limit", type=int, default=20, dest="limit",
 49 |         help="Limits the number of samples to return.")
 50 | 
 51 |     opt.add_argument("--logfile", type=str, default="log.txt", dest="log",
 52 |         help="Name of the log file.")
 53 |     
 54 |     opt.add_argument("--download-dir", type=str, default="", dest="download_dir",
 55 |         help="Name of the directory where retrieved information will be stored in.")
 56 |     
 57 |     opt.add_argument("-d", "--download", action="store_true", dest="download_samples",
 58 |         help="If set, also downloads samples from VirusTotal that are referenced in an Intelligence search.")
 59 |     
 60 |     opt.add_argument("-f", "--file", default="", dest="sample_file",
 61 |         help="Downloads samples that are referenced in a file.")
 62 | 
 63 |     opt.add_argument("--no-behavior", action="store_false", dest="download_behavior",
 64 |         help="If set, does not download behavior reports for samples.")
 65 |  
 66 |     opt.add_argument("-v", "--verbose", action="count", default=0, dest="verbose",
 67 |         help="If set, display verbose information about reports.\nUse -vvv to see detailed scan results.")
 68 |     opt.add_argument("-u", "--update-key", action="store_true", dest="update_api_key",
 69 |         help="If set, offers to enter a new API key.")
 70 |     
 71 |     opt.add_argument("-w", "--workers", type=int, default=5, dest="workers",
 72 |         help="Number of concurrent workers.")
 73 |    
 74 |     opt.add_argument("--csv", action="store_true", dest="csv",
 75 |         help="If set, display results as comma-separated values.")
 76 |     
 77 |     options = vars(opt.parse_args())
 78 |     options["separator"] = ","
 79 |     options["filenames"] = filenames
 80 | 
 81 |     if (len(options["query"]) == 0) and (len(options["sample_file"]) == 0):
 82 |         print("Please either specify a VirusTotal Intelligence search query (-q) or a file with sample hashes (-f).\n")
 83 |         sys.exit(-1)
 84 | 
 85 |     # create a new directory based on the current timestamp that will store all query- and 
 86 |     # download-related information
 87 |     if len(options["download_dir"]) == 0:
 88 |         timestamp = (datetime.now().timestamp())
 89 |         timestamp = datetime.fromtimestamp(timestamp).strftime("%Y%m%d_%H%M")
 90 | 
 91 |         options["download_dir"] = os.path.join(module_path, "downloads", timestamp)
 92 | 
 93 |     options["csv_dir"] = os.path.join(options["download_dir"], "csv")
 94 |     options["info_dir"] = os.path.join(options["download_dir"], "reports")
 95 |     options["samples_dir"] = os.path.join(options["download_dir"], "samples")
 96 |     options["reports_dir"] = os.path.join(options["download_dir"], "behavior")
 97 |     options["log"] = os.path.join(options["download_dir"], options["log"])
 98 | 
 99 |     # create directories if necessary
100 |     created = True
101 |     for directory in ["download_dir", "csv_dir", "info_dir", "samples_dir", "reports_dir"]:
102 |         try:
103 |             os.makedirs(options[directory])
104 |         except FileExistsError as err:
105 |             pass
106 |         except OSError as err:
107 |             print("Error while creating directory: {0}".format(err))
108 |             created = False
109 |     if not created: sys.exit(-1)
110 |     
111 |     helper = auxiliary.Auxiliary(options)
112 |     options["auxiliary"] = helper
113 | 
114 |     # get / save API key from / to the system keyring
115 |     options["virustotal"] = options["auxiliary"].process_api_key()
116 | 
117 |     # start interaction with the VirusTotal service
118 |     virustotal = vt.VirusTotal_Search(options)
119 | 
120 |     start_time = datetime.now()
121 |     tasks = []
122 |     # perform an Intelligence search (and download respective samples if indicated)
123 |     if len(options["query"]) > 0:
124 |         tasks.append(asyncio.create_task(virustotal.search()))
125 |        
126 |     # download samples that are referenced in a file
127 |     if (len(options["sample_file"]) > 0) and (os.path.isfile(options["sample_file"])):
128 |         if not options["download_samples"]:
129 |             options["download_samples"] = True
130 |             options["auxiliary"].log("Sample download is automatically enabled.\n", level = "WARNING")
131 | 
132 |         tasks.append(asyncio.create_task(virustotal.download_samples(options["sample_file"])))
133 |         
134 |     await asyncio.gather(*tasks)
135 |     for task in tasks:
136 |         task.cancel()
137 | 
138 | 
139 |     end_time = datetime.now()
140 |     options["auxiliary"].log("\nInformation saved to {0}.".format(options["download_dir"]))
141 |     options["auxiliary"].log("Operations completed in {0}.\n".format((end_time - start_time)))
142 | 
143 |     if options["csv"]: options["auxiliary"].close_csv_files()
144 | 
145 | 
146 | if __name__ == "__main__":
147 |     
148 |     # check for Python 3.7+
149 |     if (sys.version_info.major != 3) or ((sys.version_info.major == 3) and (sys.version_info.minor < 7)):
150 |         print("Attention: Python 3.7 or higher is required for this program.\nPlease upgrade your Python instance.\n")
151 |         sys.exit(-1)
152 |     
153 |     asyncio.run(main())
154 | 


--------------------------------------------------------------------------------
/lib/auxiliary.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import sys
  5 | import os.path
  6 | import keyring
  7 | from datetime import datetime
  8 | import logging
  9 | LOGGING_FORMAT = "[%(levelname)s]\t%(asctime)s - %(message)s"
 10 | 
 11 | 
 12 | class Auxiliary():
 13 | 
 14 |     def __init__(self, options):
 15 | 
 16 |         self.options = options
 17 |         self.logfile = self.init_logger(options["log"])
 18 | 
 19 |         if self.options["csv"]: self.create_csv_files()
 20 | 
 21 |     
 22 |     def log(self, message, logger = None, level = "INFO"):
 23 |         if self.logfile == None:
 24 |             return
 25 | 
 26 |         if logger == None:
 27 |             logger = self.logfile
 28 | 
 29 |         if level.upper() == "INFO":
 30 |             logger.info(message)
 31 |         elif level.upper() == "WARNING":
 32 |             logger.warning(message)
 33 |         elif level.upper() == "ERROR":
 34 |             logger.error(message)
 35 |         elif level.upper() == "DEBUG":
 36 |             logger.debug(message)
 37 |         else:
 38 |             logger.info(message)
 39 | 
 40 | 
 41 |     def init_logger(self, logfile, formatting = "", write_mode = "w"):
 42 |         try:
 43 |             f = open(logfile, write_mode)
 44 |             f.close()
 45 |         except IOError:
 46 |             return None
 47 | 
 48 |         logger = logging.getLogger(logfile)
 49 |         logger.setLevel(logging.DEBUG)
 50 | 
 51 |         if formatting == "":
 52 |             formatting = LOGGING_FORMAT
 53 | 
 54 |         formatter = logging.Formatter(formatting)
 55 |         handler = logging.FileHandler(logfile)
 56 |         handler.setFormatter(formatter)
 57 |         handler.setLevel(logging.DEBUG)
 58 | 
 59 |         stream = logging.StreamHandler()
 60 |         stream.setLevel(logging.INFO)
 61 | 
 62 |         logger.addHandler(handler)
 63 |         logger.addHandler(stream)
 64 | 
 65 |         return logger
 66 | 
 67 | 
 68 |     def get_logger(self):
 69 |         return self.logfile
 70 | 
 71 | 
 72 |     def get_date(self):
 73 |         timestamp = (datetime.now().timestamp())
 74 |         
 75 |         return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d")
 76 | 
 77 | 
 78 |     def convert_timestamp(self, timestamp, format = "%Y-%m-%d %H:%M:%S", output_format = "%Y-%m-%d"):
 79 |         try:
 80 |             return datetime.strptime(timestamp, format).strftime(output_format)
 81 |         except TypeError:
 82 |             return None
 83 | 
 84 | 
 85 |     def process_api_key(self):
 86 |         """ Reads the VirusTotal API key from the system keyring (virustotal -> api_key).
 87 |             If it not stored yet, the user is prompted to provide her key.
 88 |         
 89 |             :return: The API key as a string
 90 |         """
 91 | 
 92 |         api_key = keyring.get_password('virustotal', 'api_key')
 93 |         message = ""
 94 |         if api_key is None:
 95 |             message = "VirusTotal API key is not yet stored in the system keyring."
 96 |         elif self.options["update_api_key"]:
 97 |             message = "The VirusTotal API key was requested to be updated."
 98 |         else:
 99 |             self.log("VirusTotal API key was read from the system keyring.", level = "DEBUG")
100 |             return api_key
101 | 
102 |         self.log("{0}\nPlease note that you must specify an API key that is valid for the (commercial) Private API in order to fully use this program.\n".format(message), level = "WARNING")
103 |         
104 |         while True:
105 |             try:
106 |                 key1  = input("Please enter the API key, or press Ctrl+C to abort:  ")
107 |                 key2  = input("Please verify the API key, or press Ctrl+C to abort: ")
108 |                 
109 |                 if key1.strip("\n ") == key2.strip("\n "):
110 |                     api_key = key1.strip("\n ")
111 |                     keyring.set_password("virustotal", "api_key", api_key)
112 |                     self.log("VirusTotal API key was saved to the system keyring.", level = "DEBUG")
113 |                     return api_key
114 |             except KeyboardInterrupt:
115 |                 self.log("\n\nAPI key not entered. Program aborted.\n")
116 |                 sys.exit(0)
117 | 
118 | 
119 |     def create_csv_header(self, filename, fields):
120 | 
121 |         try:
122 |             file_handle = open(filename, "w")
123 |             
124 |             line = "#"
125 |             for field in fields: line += "{0}{1}".format(field, self.options["separator"])
126 |             file_handle.write("{0}\n".format(line[:-1]))
127 |             
128 |             return file_handle
129 |         except IOError as err:
130 |             self.options["auxiliary"].log("CSV file could not be created: {0}".format(filename), level = "ERROR")
131 |             return None
132 | 
133 | 
134 |     def create_csv_files(self):
135 |         
136 |         # saves a dictionary of file handles to CSV files
137 |         self.options["csv_files"] = {}
138 |         for item in self.options["filenames"]:
139 |             filename = self.options["filenames"][item]
140 |             if not filename.endswith(".csv"): continue
141 |             
142 |             fields = []
143 |             # define header fields for each artifact type 
144 |             if self.options["verbose"] < 3:
145 |                 if item == "file":
146 |                     fields = ["SHA256", "MD5", "SHA1", "Vhash", "Size", "Type", "Tags", "First submitted on", "Last submitted on", "Times submitted", "Benign", "Malicious", "Suspicious", "Undetected"]
147 |                 elif item == "domain":
148 |                     fields = ["Domain", "Registrar", "Tags", "Created on", "Last modified", "Last updated", "Benign", "Malicious", "Suspicious", "Undetected"]
149 |                 elif item == "url":
150 |                     fields = ["URL", "Final URL", "Title", "Tags", "First submitted on", "Last submitted on", "Times submitted", "Benign", "Malicious", "Suspicious", "Undetected"]
151 |             else:
152 |                 if item == "file":
153 |                     fields = ["SHA256", "MD5", "SHA1", "Vhash", "Size", "Type", "Tags", "Vendor", "Signature", "Result", "Signature Database"]
154 |                 elif item == "domain":
155 |                     fields = ["Domain", "Registrar", "Tags", "Vendor", "Signature", "Result", "Signature Database"]
156 |                 elif item == "url":
157 |                     fields = ["URL", "Final URL", "Title", "Tags", "Vendor", "Signature", "Result", "Signature Database"]
158 | 
159 |             # network IOCs for a sample should be created regardless of the verbosity level
160 |             if item == "network":
161 |                 fields = ["SHA256", "MD5", "SHA1", "Vhash", "Size", "Type", "Tags", "Host", "Port", "URL"]
162 | 
163 |             filename = os.path.join(self.options["csv_dir"], filename)
164 |             file_handle = self.create_csv_header(filename, fields)
165 |             self.options["csv_files"][item] = file_handle
166 | 
167 | 
168 |     def close_csv_files(self):
169 | 
170 |         if "csv_files" not in self.options: return
171 |         
172 |         for filename in self.options["csv_files"]:
173 |             if self.options["csv_files"][filename] is not None: 
174 |                 self.options["csv_files"][filename].close()
175 | 
176 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VTISearch - VirusTotal Intelligence Search
  2 | 
  3 | *VTISearch* is a small utility for running a VirusTotal Intelligence search query. A query can include powerful search modifiers (listed in the [documentation](https://support.virustotal.com/hc/en-us/articles/360001385897-File-search-modifiers)) that permit efficient threat research and hunting operations.
  4 | 
  5 | The program leverages v3 of the VirusTotal API. Please note that for Intelligence Search (and most other features of the program), you need a *private* API key, i.e., access to VirusTotal Enterprise. The API key is requested upon the first start and saved to the keyring of the system for security reasons.
  6 | 
  7 | By default, *VTISearch* retrieves information about the first 20 samples that are associated with the search query. However, results for up to 300 samples can be requested as well with the help of the `-l` (`--limit`) parameter.
  8 | 
  9 | Information includes the list of sample hashes (MD5, SHA1, SHA256, and - if existing - the VirusTotal *vhash* similarity hash), the type and size of the artifact, dates of (first and last) submission, and also detection statistics.
 10 | 
 11 | Additional details, e.g., scanning results per vendor, can be displayed when speciying the verbose (`-v`) parameter. Up to three different verbosity levels are supported.
 12 | 
 13 | *VTISearch* is capable of downloading the samples as well as behavioral (dynamic analysis) reports for an Intelligence search. Dynamic analysis reports are also automatically parsed in order to extract network-based Indicators of Compromise (IOCs). 
 14 | 
 15 | When using the `--csv` option, results can be exported in CSV format for subsequent import in, e.g., *Maltego* or other graph visualization programs.
 16 | 
 17 | 
 18 | ## Features
 19 | 
 20 | * Retrieves information for up to 300 artifacts (samples, domains, URLs) that are related to the search query.
 21 | * Information includes meta data as well as detailed scanning and detection results upon request.
 22 | * Supports the automatic download of associated samples and behavioral (dynamic analysis) reports.
 23 | * Behavioral reports are automatically scanned for network-based Indicators of Compromise (IOCs).
 24 | * Use of multiple workers to speed up operations.
 25 | * All information is categorized in different sub-folders. Detailed logs facilitate post-processing.
 26 | * Results can be exported in CSV format for subsequent relationship visualization with, e.g., Maltego.
 27 | 
 28 | 
 29 | ## Requirements and Installation
 30 | 
 31 | * Linux operating system (tested on Ubuntu 18.04)
 32 | 
 33 | * Python 3.7+
 34 | * pip3
 35 | * vt-py
 36 | * keyring
 37 | 
 38 | ### Notes on Python 3.7 and Ubuntu 18.04
 39 | 
 40 | 1. By default, Python 3.6 is still installed on Ubuntu 18.04. You can install version 3.7 (or 3.8) with
 41 | 
 42 | ```bash
 43 | $ sudo apt-get install python3.7
 44 | ```
 45 | 
 46 | and then change to the new version with\*
 47 | 
 48 | ```
 49 | $ sudo update-alternatives --config python3
 50 | ```
 51 | 
 52 | (\* In case you should get an error message that no alternative had been found, please run `sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1`.)
 53 | 
 54 | If you subsequently run `python3 --version` you should see the new version. Please note that you might have to reinstall respective packages for this version.
 55 | 
 56 | 
 57 | 2. Please reinstall pip3 with
 58 | 
 59 | ```
 60 | $ sudo apt-get install --reinstall python3-pip
 61 | ```
 62 | 
 63 | 
 64 | ### Package installation and Repository Cloning
 65 | 
 66 | 1. Once Python 3.7 is avaialble, you can comfortably install all required packages as follows:
 67 | 
 68 | ```bash
 69 | $ sudo pip3 install vt-py keyring 
 70 | ```
 71 | 
 72 | (I am globally installing the packages in this example. Please feel free to set up a virtual environment instead if you prefer.)
 73 | 
 74 | 
 75 | 2. Clone the *VTISearch* repository, and start the program:
 76 | 
 77 | ```
 78 | $ git clone https://github.com/svo80/vti_search.git .
 79 | 
 80 | $ cd vti_search && python3 vti_search -h
 81 | ```
 82 | 
 83 | 
 84 | ## Options and Usage
 85 | 
 86 | ```
 87 | usage: vti_search.py [-h] [-q QUERY] [-l LIMIT] [--logfile LOG]
 88 |                      [--download-dir DOWNLOAD_DIR] [-d] [-f SAMPLE_FILE]
 89 |                      [--no-behavior] [-v] [--csv]
 90 | 
 91 | optional arguments:
 92 |   -h, --help                          Show this help message and exit
 93 | 
 94 |   -q QUERY, --query QUERY             Run a VirusTotal Intelligence search query.
 95 | 
 96 |   -l LIMIT, --limit LIMIT             Limits the number of samples to return.
 97 | 
 98 |   --logfile LOG                       Name of the log file.
 99 | 
100 |   --download-dir DOWNLOAD_DIR         Name of the directory where retrieved information will
101 |                                       be stored in.
102 | 
103 |   -d, --download                      If set, also downloads samples from VirusTotal that
104 |                                       are referenced in an Intelligence search.
105 | 
106 |   -f SAMPLE_FILE, --file SAMPLE_FILE  Downloads samples that are referenced in a file.
107 | 
108 |   --no-behavior                       If set, does not download behavior reports for
109 |                                       samples.
110 | 
111 |   -v, --verbose                       If set, display verbose information about reports. Use
112 |                                       -vvv to see detailed scan results.
113 | 
114 |   -u, --update-key                    If set, offers to enter a new API key.
115 | 
116 |   -w, --workers WORKERS               Number of concurrent workers.
117 | 
118 |   --csv                               If set, display results as comma-separated values.
119 | ```
120 | 
121 | In the majority of cases, *VTISearch* will be executed with the `-q` (`--query`) parameter. This query is sent to VirusTotal via the `v3` API. Respective samples will not be downloaded by default. However, this procedure can be easily activated with the `-d` parameter.
122 | 
123 | ```bash
124 | $ python3 vti_search.py -q "evil.exe" -d
125 | ```
126 | 
127 | Rather than performing an Intelligence search, it is also possible processing a list of hashes that are stored in a file. As such, the program can be used as a quick sample downloader and IOC processor:
128 | 
129 | ```bash
130 | $ python3 python3 vti_search.py -f ./iocs.txt
131 | ```
132 | 
133 | The approaches can also be mixed. For instance, you might want to first check the results of a query slightly more in detail, adapt the list of samples in scope, and then re-run the program with the download option enabled for the updated sample list.
134 | 
135 | Alternatively, you might want to combine the results of an Intelligence search with indicators highlighted in a (third-party) report in order to create a more detailed overview of a specific campaign or operation.
136 | 
137 | By default, all log files, samples, and reports are stored in a separate directory (identified by its timestamp) that is created at program startup in the `downloads` folder. If you prefer rather updating an existing directory, you can explicitly set the `--download-dir` parameter.
138 | 
139 | For instance, assuming you would like to investigate an APT campaign, you can perform an Intelligence search, retrieve the first 100 results in detailed format, and store all information in a specific folder as follows:
140 | 
141 | ```bash
142 | $ python3 vt_search.py -d -q <query> -l 100 -vvv --download-dir=downloads/apt
143 | ```
144 | 
145 | ## Sample Queries and Intelligence Searches
146 | 
147 | The following queries are solely for demonstration purposes to illustrate search capabilities and possible use cases for the program:
148 | 
149 | 1. Show samples with detection statistics that were submitted after May 1, 2020 and were detected by more than five but less than 10 vendors. 
150 | 
151 | ```bash
152 | $ python3 vti_search.py -q "ls:2020-05-01+ positives:5+ positives:10-" -v --no-behavior
153 | ```
154 | 
155 | 
156 | 2. Show PDF documents in German that were delivered as an email attachment and contain an embedded JavaScript.
157 | 
158 | ```
159 | $ python3 vti_search.py -q "tag:attachment type:pdf lang:german tag:js-embedded"
160 | ```
161 | 
162 | 
163 | 3. Show signed executables with a size of less than 300KB that were detected by more than five vendors.
164 | 
165 | ```bash
166 | $ python3 vti_search.py -q "size:300KB- positives:5+ tag:signed type:peexe"
167 | ```
168 | 
169 | 
170 | 4. Show up to five samples, representing Microsoft Office documents that execute code upon opening and likely set an AutoRun key for persistence.
171 | 
172 | ```bash
173 | $ python3 vti_search.py -q "behavior:'currentversion\run\' type:docx tag:auto-open" -l 5
174 | ```
175 | 
176 | 
177 | ## Data Export and Collaboration
178 | 
179 | *VTISearch* supports exporting all information in CSV format. Exported contents are dependent on the verbosity level.
180 | 
181 | For instance, when specifying the `-vvv` parameter, detailed anti-virus scanning reports will be exported into CSV format. On the other hand, when solely specifying the `-v` parameter, higher level summary statistics will be created.
182 | 
183 | The list of network indicators retrieved from dynamic analysis sandbox reports can be exported in CSV format as well. This information can subsequently be loaded with, e.g., [Maltego](https://www.maltego.com/) in order to visualize respective relationships.
184 | 
185 | 
186 | ## Example Run
187 | 
188 | ```bash
189 | $ python3 vti_search.py -d -q evil.exe -l 10 -vv
190 | 
191 | VTISearch - VirusTotal Intelligence Search - Version 0.1.0
192 | 
193 | Written by Stefan Voemel.
194 | ------------------------------------------------------------------------------------------
195 | 
196 | 2axxxxxxxxxe4b2be454ed0dxxxxxxxxxx7db18e9780xxxxxxxx10dcabxxxxxx
197 |   MD5:                        xxxxx09dxxxxxc271cxxxxx5cb6xxxxx
198 |   Sha1:                       xxxxxx71bxxxxx4aaxxxx383xxxxce8xxxxe00xx
199 |   VHash:                      xxx04xx5xdxx1xx8xxxx2txxxx
200 | 
201 |   Type:                       PE32 executable for MS Windows (GUI) Intel 80386 32-bit
202 |   Type Tag:                   peexe
203 |   Size:                       73802
204 | 
205 |   First submission:           2020-05-07 11:16:58
206 |   Last submission:            2020-05-07 11:16:58
207 |   Number of submissions:      1
208 |   Unique sources:             1
209 | 
210 |   Malicious:                  58
211 |   Suspicious:                 0
212 |   Undetected:                 14
213 | 
214 |   [Host]                      1xx.16.xxx.xxx:4444
215 | 
216 | 798xxxx29xxxx4xxxe3dxxxa8xfxx3x2excxxxe7xxc4cxxxd4x4fx4x05xxxxxx
217 |   MD5:                        xxxx27xxxx28xxxx14xxxb34xxx13xxx
218 |   Sha1:                       xxxxb6xxx1f4xxxxdb26xxxx94xxxx5dxx61cxxx
219 |   VHash:                      xxx03xxx7dxxx2xx
220 | 
221 |   Type:                       PE32 executable for MS Windows (console) Intel 80386 32-bit
222 |   Type Tag:                   peexe
223 |   Size:                       4752
224 | 
225 |   First submission:           2011-07-04 22:00:08
226 |   Last submission:            2020-05-06 13:39:21
227 |   Number of submissions:      1951
228 |   Unique sources:             1472
229 | 
230 |   Malicious:                  58
231 |   Suspicious:                 0
232 |   Undetected:                 14
233 | 
234 |   [Host]                      1xx.1xx.221.22:80
235 |   [Host]                      1xx.1xx.131.241:80
236 |   [Host]                      1xx.xxx.78.24:443
237 |   [Host]                      1xx.xxx.78.25:443
238 |   [URL]                       hxxp://www.xxxxxxxx.com/ad.html
239 | ```
240 | 
241 | 
242 | ## File Structure
243 | 
244 | ```bash
245 | ├── downloads                     Program data
246 | │   └── <timestamp>
247 | │       ├── artifacts.txt         List of artifacts that were in scope
248 | │       ├── behavior/             Directory for behavioral reports
249 | │       ├── csv                   Directory with CSV files
250 | │       │   ├── domains.csv       Exported domains (if existing)
251 | │       │   ├── network_iocs.csv  Exported network indicators
252 | │       │   ├── samples.csv       Exported samples (if existing)
253 | │       │   └── urls.csv          Exorted URLs (if existing)
254 | │       ├── log.txt               Detailed log file with program runtime messages
255 | │       ├── reports/              Directory for summary reports and network indicators (*.ioc)
256 | │       │   ├── <sample>          Textual summary report for a sample
257 | │       │   ├── <sample.ioc>      Extracted network indicators for a sample
258 | │       │   ├── <sample.raw>      Static analysis report for a sample in JSON format
259 | │       ├── samples/              Directory for malware samples
260 | │   
261 | ├── lib                           Program libraries
262 | │   ├── auxiliary.py
263 | │   ├── sandboxes.py
264 | │   └── vt.py
265 | │   
266 | ├── README.md
267 | └── vti_search.py                 Main program file
268 | ```
269 | 
270 | 
271 | ## Comments and Additional Notes
272 | 
273 | I am not a professional developer or software engineer, and this program should be seen as a small helper tool. While I do enjoy periodically writing smaller utilities in my free time for Incident Response, malware analysis, and Threat Intelligence scenarios, I very rarely upload any of them. 
274 | 
275 | The only reason why I did so for this program is, because the number of alternatives for the v3 VirusTotal API is currently still very much limited. This being said, I spend the vast majority of my time (i.e., my professional life) with leading security teams and offering strategic advice and guidance on a higher level. As such, if you believe that the code is *\<beep\>*, you are probably right.
276 | 


--------------------------------------------------------------------------------
/lib/vt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import vt
  4 | import sys
  5 | import os.path
  6 | import json
  7 | import re
  8 | import requests
  9 | import asyncio
 10 | 
 11 | from .artifact import Artifact
 12 | from .sandboxes import Sandbox_Parser
 13 | 
 14 | 
 15 | class VirusTotal_Search(Artifact):
 16 |     """ Provides a class for running a VirusTotal Intelligence search and processing respective
 17 |         results.
 18 | 
 19 |         By default, at max 300 results are returned per query.
 20 |     """
 21 | 
 22 |     def __init__(self, options):
 23 | 
 24 |         super().__init__(options)
 25 |         
 26 |         self.options = options
 27 |         self.auxiliary = options["auxiliary"]
 28 | 
 29 | 
 30 |         self.site = {
 31 |                         
 32 |                         "url"       :   "https://virustotal.com/api/v3/",
 33 |                         "header"    :   {
 34 |                                             "x-apikey"  :   self.options["virustotal"]
 35 |                                         }
 36 |                     }
 37 | 
 38 |         self.client = vt.Client(self.options["virustotal"])
 39 |         
 40 |         # TODO: consolidate queues
 41 |         self.sample_queue = asyncio.Queue()
 42 |         self.behavior_queue = asyncio.Queue()
 43 |         self.info_queue = asyncio.Queue()
 44 | 
 45 | 
 46 |     async def search(self):
 47 |         """ Executes a VirusTotal Intelligence search
 48 |         """
 49 |         
 50 |         async with vt.Client(self.options["virustotal"]) as client:
 51 |             self.options["auxiliary"].log("Running intelligence query: {0}".format(self.options["query"]))
 52 |             it = client.iterator('/intelligence/search',  params={'query': self.options["query"]}, limit=self.options["limit"])
 53 |             
 54 |             artifact_log = os.path.join(self.options["download_dir"], self.options["filenames"]["artifacts"])
 55 | 
 56 |             tasks = []
 57 |             asyncio.create_task(self.get_heartbeat())
 58 |             with open(artifact_log, "w") as f:
 59 |                 # iterate through the result set - each element represents a File object
 60 |                 try:
 61 |                     async for obj in it:
 62 |                         if obj.type not in ["file", "url", "domain"]:
 63 |                             self.options["auxiliary"].log("Warning: Unknown artifact type detected: {0} - {1:70}".format(obj.type, obj.id), level="WARNING")
 64 |                             continue
 65 |                         
 66 |                         # log the name / identifier of the artifact
 67 |                         if obj.type in ["file", "domain"]:
 68 |                             f.write("{0}\n".format(obj.id))
 69 |                         elif obj.type == "url":
 70 |                             f.write("{0} => {1}\n".format(obj.id, obj.url)) 
 71 | 
 72 |                         # for samples, request downloading the artifact and behavior report
 73 |                         if obj.type == "file":
 74 |                             if self.options["download_samples"]  : await self.sample_queue.put(obj)
 75 |                             if self.options["download_behavior"] : await self.behavior_queue.put(obj)
 76 |                         
 77 |                         # save the report summary
 78 |                         sample_report = os.path.join(self.options["info_dir"], obj.id)
 79 |                         super().display_information(obj, sample_report)
 80 |                 except vt.error.APIError as err:
 81 |                     
 82 |                     if err.code in ["AuthenticationRequiredError", "ForbiddenError", "UserNotActiveError", "WrongCredentialsError"]:
 83 |                         self.auxiliary.log("The API key is not valid for accessing the VirusTotal Private API, or there was a problem with the user account.", level = "ERROR")
 84 |                     elif err.code in ["QuotaExceededError", "TooManyRequestsError"]:
 85 |                         self.auxiliary.log("The quota for the API key or the number of issued requests has been exceeded.", level = "ERROR")
 86 |                     else:
 87 |                         self.auxiliary.log("There was an error while processing the request: {0}".format(err.code), level="ERROR")
 88 | 
 89 |                     return None
 90 | 
 91 |                     
 92 |                 for worker in range(self.options["workers"]):
 93 |                     if self.options["download_behavior"]: tasks.append(asyncio.create_task(self.get_behavior_report()))
 94 |                     if self.options["download_samples"]: tasks.append(asyncio.create_task(self.get_sample()))
 95 |                             
 96 |                 await asyncio.gather(*tasks)
 97 |                 await self.behavior_queue.join()
 98 |                 await self.sample_queue.join()
 99 |                 for task in tasks: task.cancel()
100 | 
101 | 
102 |     async def download_samples(self, filename):
103 |         """ Reads in a list of hashes from a file for subsequent sample download
104 | 
105 |             :param filename: The name of the file that contains the list of hashes
106 |         """
107 |         
108 |         md5 = re.compile(r"([a-fA-F\d]{32})")
109 |         sha1 = re.compile(r"([a-fA-F\d]{40})")
110 |         sha256 = re.compile(r"([a-fA-F\d]{64})")
111 | 
112 |         samples = []
113 |         asyncio.create_task(self.get_heartbeat())
114 |         with open(filename, "r") as f:
115 |             for data in f:
116 |                 data = data.strip("\n ")
117 |                 if md5.match(data) or sha1.match(data) or sha256.match(data):
118 |                     # if the entry in the file represents a sample by hash, and the 
119 |                     # sample is appearing for the first time, add it to the queue
120 |                     if data not in samples:
121 |                         await self.info_queue.put(data)
122 |                         samples.append(data)
123 | 
124 |         # retrieve summary information and check if the sample exists
125 |         tasks = []
126 |         for worker in range(self.options["workers"]):
127 |             result = tasks.append(asyncio.create_task(self.get_sample_info()))
128 |                     
129 |         results = await asyncio.gather(*tasks)
130 |         await self.info_queue.join()
131 |         for task in tasks: task.cancel()
132 | 
133 |         # download artifacts that are existing as well as corresponding behavior reports
134 |         for worker in results:
135 |             for sample in worker:
136 |                 if sample is not None:
137 |                     if self.options["download_samples"]  : await self.sample_queue.put(sample)
138 |                     if self.options["download_behavior"] : await self.behavior_queue.put(sample)
139 | 
140 |         tasks = []
141 |         for worker in range(self.options["workers"]):
142 |             if self.options["download_behavior"]: tasks.append(asyncio.create_task(self.get_behavior_report()))
143 |             if self.options["download_samples"]: tasks.append(asyncio.create_task(self.get_sample()))
144 |                     
145 |         await asyncio.gather(*tasks)
146 |         await self.behavior_queue.join()
147 |         await self.sample_queue.join()
148 |         for task in tasks: task.cancel()
149 | 
150 | 
151 |     async def execute_request(self, request):
152 |         """ Runs an asynchronous call to retreive a behavioral report from VirusTotal
153 |         
154 |             :param request: The API request to execute
155 | 
156 |             :return:        JSON output that is contained in the 'data' field
157 |         """
158 | 
159 |         async with vt.Client(self.options["virustotal"]) as client:
160 |             try:
161 |                 url = requests.compat.urljoin(self.site["url"], request)
162 |                 result = await client.get_json_async(url)
163 |                 
164 |                 if "data" not in result:
165 |                     raise ValueError("No valid JSON report received")
166 |                 
167 |                 return result["data"]
168 |             except vt.error.APIError as err:
169 |                 return None
170 |             except ValueError as err:
171 |                 self.options["auxiliary"].log("Behavior report for sample did not contain valid data: {0}".format(url))
172 |                 return None
173 | 
174 | 
175 |     async def get_heartbeat(self):
176 |         """ Periodically print a status message of the queue to indicate the number of pending tasks
177 |         """
178 | 
179 |         while True:
180 |             sys.stdout.write("\033[94m[Queue] Sample Reports: {0:03d} - Artifacts: {1:03d} - Behavior Reports: {2:03d}\033[0m\r".format(self.info_queue.qsize(), self.sample_queue.qsize(), self.behavior_queue.qsize()))
181 |             sys.stdout.flush()
182 |             await asyncio.sleep(1)
183 | 
184 | 
185 |     async def get_sample_info(self):
186 |         """ Retrieves summary information about a sample
187 |         """
188 |         
189 |         samples = []
190 |         async with vt.Client(self.options["virustotal"]) as client:
191 |             while not self.info_queue.empty():
192 |                 try:
193 |                     sample_id = await self.info_queue.get()
194 |                     path = os.path.join("/files", sample_id)
195 |                     
196 |                     # this call should be always performed to check if the sample exists
197 |                     # and get context information for a hash value
198 |                     result = await client.get_object_async(path)
199 | 
200 |                     sample_report = os.path.join(self.options["info_dir"], sample_id)
201 |                     super().display_information(result, sample_report)
202 | 
203 |                     samples.append(result)
204 |                 except vt.error.APIError as err:
205 |                     if err.code == "NotFoundError":
206 |                         self.options["auxiliary"].log("Sample was not found: {0}\n".format(sample_id), level = "WARNING")
207 |                         self.info_queue.task_done()
208 |                         continue
209 |                     elif err.code in ["AuthenticationRequiredError", "ForbiddenError", "UserNotActiveError", "WrongCredentialsError"]:
210 |                         self.auxiliary.log("The API key is not valid for accessing the VirusTotal Private API, or there was a problem with the user account.", level = "ERROR")
211 |                     elif err.code in ["QuotaExceededError", "TooManyRequestsError"]:
212 |                         self.auxiliary.log("The quota for the API key or the number of issued requests has been exceeded.", level = "ERROR")
213 |                     else:
214 |                         self.auxiliary.log("There was an error while processing the request: {0}".format(err.code), level="ERROR")
215 |                     
216 |                     # clear all remaining items in the queue
217 |                     while not self.info_queue.empty(): 
218 |                         await self.info_queue.get()
219 |                         self.info_queue.task_done()
220 | 
221 |                 self.info_queue.task_done()
222 | 
223 |         return samples
224 | 
225 | 
226 |     async def get_behavior_report(self):
227 |         """ Retrieves a behavior report from VirusTotal
228 |             (The behavior report can consist of a result list from multiple sandboxes)
229 | 
230 |             :return:            True if the report was successfully downloaded or was successfully
231 |                                 read from disk (if existing), otherwise False
232 |         """
233 | 
234 |         async with vt.Client(self.options["virustotal"]) as client:
235 |             while not self.behavior_queue.empty():
236 |                 sample = await self.behavior_queue.get()
237 |                 sample_id = sample if isinstance(sample, str) else sample.id
238 |                 
239 |                 # check if a sample object rather than a hash was provided
240 |                 report_file = os.path.join(self.options["reports_dir"], sample_id) 
241 |                 report_retrieved = False
242 | 
243 |                 # if the report file is not on disk yet, it is downloaded
244 |                 if not os.path.isfile(report_file):
245 |                     url = 'files/{0}/behaviours'.format(sample_id)
246 |                     result = await self.execute_request(url)
247 |                     
248 |                     if result is None:
249 |                         self.options["auxiliary"].log("Sample does not have a behavior report, or the report could not be retrieved: {0}".format(sample_id), level="ERROR")
250 |                         self.behavior_queue.task_done()
251 |                         continue
252 |                     try:
253 |                         with open(report_file, "w") as f:
254 |                             json.dump(result, f)
255 |         
256 |                         self.options["auxiliary"].log("Saved behaviorial report: {0}".format(report_file), level = "DEBUG")
257 |                         report_retrieved = True
258 |                     except IOError as err:
259 |                         self.options["auxiliary"].log("Error while saving behaviorial report: {0} - {1}".format(report_file, err), level = "ERROR")
260 |                 else:
261 |                     # the report has already been downloaded and is stored on disk
262 |                     self.options["auxiliary"].log("Behavior report for sample already exists on disk and is not downloaded again: {0}".format(sample_id), level = "DEBUG")
263 |             
264 |                     try:  
265 |                         with open(report_file, "r") as f:
266 |                             result = json.load(f)
267 | 
268 |                         report_retrieved = True
269 |                     except (IOError, json.JSONDecodeError) as err:
270 |                         self.options["auxiliary"].log("Error while reading behaviorial report: {0} - {1}".format(report_file, err), level = "ERROR")
271 | 
272 |                 if report_retrieved:
273 |                     sandbox = Sandbox_Parser(self.options, result)
274 |                     sandbox.parse_report(sample)
275 | 
276 |                 self.behavior_queue.task_done()
277 | 
278 | 
279 |     async def get_sample(self):
280 |         """ Downloads a sample from VirusTotal
281 | 
282 |             :param sample_id:   The id (hash value) of the sample
283 |             
284 |             :return:            True if the sample was successfully downloaded, otherwise False
285 |                                 (In case the sample already exists on disk, the return value
286 |                                 is also False)
287 |         """
288 |         
289 |         async with vt.Client(self.options["virustotal"]) as client:
290 |             while not self.sample_queue.empty():
291 |                 try:
292 |                     sample_id = await self.sample_queue.get()
293 |                     # check if a sample object rather than a hash was provided
294 |                     if not isinstance(sample_id, str): sample_id = sample_id.id
295 |                     
296 |                     sample_path = os.path.join(self.options["samples_dir"], sample_id)
297 |                     
298 |                     # if the file is already on disk, it is not downloaded again
299 |                     # TODO: Possibly check more than purely the filename to be sure the content was previously
300 |                     #       correctly downloaded as well?  
301 |                     if os.path.isfile(sample_path): 
302 |                         self.options["auxiliary"].log("Sample already exists on disk and is not downloaded again: {0}".format(sample_id), level = "DEBUG")
303 |                         self.sample_queue.task_done()
304 |                         continue
305 |                     
306 |                     # save the sample to disk
307 |                     with open(sample_path, "wb") as f:
308 |                         await client.download_file_async(sample_id, f)
309 |                         self.options["auxiliary"].log("Successfully downloaded sample: {0}".format(sample_id), level = "DEBUG")
310 | 
311 |                     self.sample_queue.task_done()
312 |                 except IOError as err:
313 |                     self.options["auxiliary"].log("Error while downloading sample: {0}".format(err), level = "ERROR")
314 |                     self.sample_queue.task_done()
315 | 


--------------------------------------------------------------------------------
/lib/artifact.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os.path
  4 | import json
  5 | 
  6 | """
  7 |     Super class for displaying information about an artifact and / or saving the information to
  8 |     an artifact report file on disk.
  9 | 
 10 |     
 11 |     Keywords and description:
 12 |     https://developers.virustotal.com/v3.0/reference#files  
 13 | 
 14 |     - Hashes like md5, sha1 and sha256 that identifies it
 15 |     - size of the file
 16 |     - first_submission_date when the file was first received in VirusTotal (as a UNIX timestamp)
 17 |     - last_submission_date last time we received it (as a UNIX timestamp)
 18 |     - last_analysis_date last time we analysed it (as a UNIX timestamp)
 19 |     - last_modification_date last time the object itself was modified (as a UNIX timestamp)
 20 |     - times_submitted how many times VirusTotal had received it
 21 |     - last_analysis_results: result of the last analysis. 
 22 |         
 23 |         dict with AV name as key and a dict with notes/result from that scanner as value.
 24 |         category: normalized result. can be:
 25 |         
 26 |         - "harmless" (AV thinks the file is not malicious),
 27 |         - "undetected" (AV has no opinion about this file),
 28 |         - "suspicious" (AV thinks the file is suspicious),
 29 |         - "malicious" (AV thinks the file is malicious).
 30 | 
 31 |     - names we have seen the file with, being meaningful_name the one we consider more interesting
 32 |     - unique_sources indicates from how many different sources the file has been received
 33 | 
 34 | 
 35 |     In the attributes dictionary you are going to find also fields with information extracted from the file itself. We characterise the file and expose this information in the following keys:
 36 | 
 37 |     - type_description describe the type of file it is, being type_tag it short and you can use to search files of the same kind.
 38 |     - creation_date is extracted when possible from the file and indicates the timestamp the compilation or build tool give to it when created, it can be also faked by malware creators.
 39 |     - total_votes received from the VirusTotal community, each time a user vote a file it is reflected in this values. reputation field is calculated from the votes the file received and the users reputations credits.
 40 |     - vhash an in-house similarity clustering algorithm value, based on a simple structural feature hash allows you to find similar files
 41 |     - tags are extracted from different parts of the report and are labels that help you search similar samples
 42 | 
 43 |     Additionally VirusTotal together with each Antivirus scan runs a set of tool that allows us to collect more information about the file. All this tool information is included in the "attributes" key, together with the rest of fields previously described.
 44 | 
 45 | """
 46 | 
 47 | # Translation map for internal objects
 48 | KEYWORD_MAP = {
 49 |         # file attributes
 50 |         "md5"                   :   "MD5",
 51 |         "sha1"                  :   "Sha1",
 52 |         "vhash"                 :   "VHash",
 53 |         "first_submission_date" :   "First submission",
 54 |         "last_submission_date"  :   "Last submission",
 55 |         "times_submitted"       :   "Number of submissions",
 56 |         "unique_sources"        :   "Unique sources",
 57 |         "size"                  :   "Size",
 58 |         "type_tag"              :   "Type",
 59 |         "tags"                  :   "Tag(s)",
 60 |         "magic"                 :   "File description",
 61 | 
 62 |         # domain attributes
 63 |         "creation_date"         :   "Creation date",
 64 |         "last_modification_date":   "Last modified",
 65 |         "last_update_date"      :   "Last updated",
 66 |         "registrar"             :   "Registrar",
 67 |         
 68 |         # url attributes
 69 |         "title"                 :   "Title",
 70 |         "last_final_url"        :   "Final URL",
 71 | 
 72 |         # attributes for scan results
 73 |         "harmless"              :   "Benign",
 74 |         "suspicious"            :   "Suspicious",
 75 |         "malicious"             :   "Malicious",
 76 |         "undetected"            :   "Undetected",
 77 |         "failure"               :   "Failure",
 78 |         "type-unsupported"      :   "Unsupported",
 79 | }
 80 | 
 81 | 
 82 | class Artifact():
 83 |     """ Provides a class for running a VirusTotal Intelligence search and processing respective
 84 |         results.
 85 | 
 86 |         By default, at max 300 results are returned per query.
 87 |     """
 88 | 
 89 |     def __init__(self, options):
 90 | 
 91 |         self.options = options
 92 |         self.auxiliary = options["auxiliary"]
 93 | 
 94 | 
 95 |     def display_scanning_results(self, sample, required_verbose_level = 0, file_handle = None):
 96 |         """ Displays scanning results per anti-virus vendor
 97 |             
 98 |             :param sample:                  The sample object
 99 |             :param required_verbose_level:  Displays results on screen if the verbose level
100 |                                             is high enough, otherwise only logs results to a file
101 |             :param file_handle:             If set, writes information to an artifact report file
102 |         """
103 | 
104 |         results = sample.last_analysis_results
105 |         for item in results:
106 |             engine = results[item]
107 |             
108 |             # category can be, e.g., suspicious, malicious, undetected, etc. 
109 |             category = KEYWORD_MAP[engine["category"]] if engine["category"] in KEYWORD_MAP else engine["category"]         
110 |             signature = engine["result"] if engine["result"] is not None else "--"
111 |             if len(signature) > 40: signature = "{0} (...)".format(signature[:40])
112 | 
113 |             if "engine_update" in engine and engine["engine_update"] is not None:
114 |                 signature_database = engine["engine_update"] 
115 |             else:
116 |                 signature_database = "--"
117 | 
118 |             string = "{0}{1:28}{2:47}{3:25}(Signature Database: {4})".format(" " * 2, engine["engine_name"], signature, category, signature_database)
119 |             if self.options["verbose"] >= required_verbose_level: print(string)
120 |             if file_handle is not None: file_handle.write("{0}\n".format(string))
121 | 
122 |             if self.options["csv"] and self.options["verbose"] >= 3:
123 |                 line = ""
124 |                 attributes = dir(sample)
125 | 
126 |                 if sample.type == "file":
127 |                     fields = ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags"]
128 |                 elif sample.type == "domain":
129 |                     fields = ["id", "registrar", "tags"]
130 |                 elif sample.type == "url":
131 |                     fields = ["url", "last_final_url", "title", "tags"]
132 |                 else:
133 |                     fields = []
134 | 
135 |                 for value in fields:
136 |                     if value not in attributes:
137 |                         line += self.options["separator"]
138 |                         continue
139 | 
140 |                     if isinstance(getattr(sample, value), list):
141 |                         list_items = ""
142 |                         for item in getattr(sample, value):
143 |                             list_items += "{0}|".format(item)
144 |                         line += "\"{0}\"{1}".format(list_items[:-1], self.options["separator"])
145 |                     else:
146 |                         line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"])
147 |                 for value in ["engine_name", "result", "category", "engine_update"]:
148 | 
149 |                     if value in engine and engine[value] is not None:
150 |                         line += "\"{0}\"{1}".format(engine[value], self.options["separator"]) 
151 |                     else:
152 |                         line += "\"\"{0}".format(self.options["separator"])
153 |                 
154 |                 self.options["csv_files"][sample.type].write("{0}\n".format(line[:-1]))
155 |                 
156 |         if self.options["verbose"] >= required_verbose_level: print()
157 |         if file_handle is not None: file_handle.write("\n")
158 | 
159 | 
160 |     def display_values(self, id_list, sample, filter_values = None, required_verbose_level = 0, file_handle = None):
161 |         """
162 |             :param id_list:                 List of attributes that should be processed
163 |             :param sample:                  The sample object
164 |             :param filter_values:           White list of values that should be exclusively considered
165 |                                             when parsing an attribute list
166 |             :param required_verbose_level:  Displays results on screen if the verbose level
167 |                                             is high enough, otherwise only logs results to a file
168 |             :param file_handle:             If set, writes information to an artifact report file
169 |         """
170 | 
171 |         for value in id_list:
172 |             if value not in dir(sample): continue
173 |             
174 |             if isinstance(getattr(sample, value), dict):
175 |                 for item in getattr(sample, value):
176 |                     if filter_values is not None and isinstance(filter_values, list):
177 |                         if item not in filter_values: continue
178 | 
179 |                     label = KEYWORD_MAP[item] if item in KEYWORD_MAP else item
180 | 
181 |                     string = "{0}{1:28}{2}".format(" " * 2, label + ":", getattr(sample, value)[item])
182 |                     if self.options["verbose"] >= required_verbose_level: print(string)
183 |                     if file_handle is not None: file_handle.write("{0}\n".format(string))
184 |             elif isinstance(getattr(sample, value), list):
185 |                 line = ""
186 |                 for item in getattr(sample, value):
187 |                     line += "{0}, ".format(item)
188 |                 label = KEYWORD_MAP[value] if value in KEYWORD_MAP else value
189 | 
190 |                 string = "{0}{1:28}{2}".format(" " * 2, label + ":", line[:-2])
191 |                 if self.options["verbose"] >= required_verbose_level: print(string)
192 |                 if file_handle is not None: file_handle.write("{0}\n".format(string))
193 |             else:
194 |                 label = KEYWORD_MAP[value] if value in KEYWORD_MAP else value
195 |                 string = "{0}{1:28}{2}".format(" " * 2, label + ":", getattr(sample, value))
196 |                 if self.options["verbose"] >= required_verbose_level: print(string)
197 |                 if file_handle is not None:  file_handle.write("{0}\n".format(string))
198 | 
199 |         if self.options["verbose"] >= required_verbose_level:  print("")
200 |         if file_handle is not None: file_handle.write("\n")
201 | 
202 | 
203 |     def display_information(self, sample, filename = None):
204 |         """
205 |             Displays information about an artifact that was returned as part of a search query.
206 |             Displayed information is dependent on the artifact type.
207 | 
208 |             :param sample:   Sample object (type: file, domain, url)
209 |             :param filename: Name of a report file
210 |         """
211 | 
212 |         identifier = ""
213 |         if sample.type in ["file", "domain"]:
214 |             # INFO: For domains, the identifier is the domain name
215 |             #       This appears to be okay, as for unicode characters an internationalized domain
216 |             #       name is returned which should not cause any conflict with the file system level
217 |             # TODO: check this with dedicated tests
218 |             identifier = sample.id
219 |         elif sample.type == "url":
220 |             identifier = sample.url
221 |         else:
222 |             self.options["auxiliary"].log("Unknown sample type detected: {0} - {1}".format(sample.type, sample.id), level="WARNING")
223 |         print("{0:80}".format(identifier))
224 | 
225 |         # write the summary information to disk if a filename was provided and the report
226 |         # does not exist yet, otherwise only log but do not rewrite
227 |         file_handle = None
228 |         if (filename is not None) and (not os.path.exists(filename)): 
229 |             file_handle = open(filename, "w")
230 |             file_handle.write("{0}\n".format(identifier))
231 |         elif (filename is not None) and (os.path.exists(filename)):
232 |             self.options["auxiliary"].log("Summary report for the sample already exists on disk and is not downloaded again: {0}".format(sample.id), level = "DEBUG")
233 |         
234 |         # write the raw report to disk if a filename was provided and the report
235 |         # does not exist yet, otherwise only log but do not rewrite
236 |         raw_filename = "{0}.raw".format(filename)
237 |         if (filename is not None) and (not os.path.exists(raw_filename)):
238 |             try:
239 |                 with open(raw_filename, "w") as f:
240 |                     json.dump(sample.to_dict(), f)
241 |             except (IOError, TypeError) as err:
242 |                 self.options["auxiliary"].log("There was an error while saving the raw report to disk for sample: {0} - {1}".format(sample.id, err), level="ERROR")
243 |         elif (filename is not None) and (os.path.exists(raw_filename)):
244 |             self.options["auxiliary"].log("The raw report for the sample already exists on disk and is not downloaded again: {0}".format(sample.id), level = "DEBUG")
245 | 
246 | 
247 |         if self.options["csv"] and self.options["verbose"] < 3:
248 |             line = ""
249 |             attributes = dir(sample)
250 | 
251 |             # determine output fields by artifact type
252 |             fields = []
253 |             if sample.type == "file":
254 |                 fields = ["sha256", "md5", "sha1", "vhash", "size", "type_tag", "tags", "first_submission_date", "last_submission_date", "times_submitted"]
255 |             elif sample.type == "domain":
256 |                 fields = ["id", "registrar", "tags", "creation_date", "last_modification_date", "last_update_date"]
257 |             elif sample.type == "url":
258 |                 fields = ["url", "last_final_url", "title", "tags", "first_submission_date", "last_submission_date", "times_submitted"]
259 |             else:
260 |                 fields = []
261 | 
262 |             for value in fields: 
263 |                 if value not in attributes:
264 |                     line += self.options["separator"]
265 |                     continue
266 | 
267 |                 if isinstance(getattr(sample, value), list):
268 |                     list_items = ""
269 |                     for item in getattr(sample, value):
270 |                         list_items += "{0}|".format(item)
271 |                     line += "\"{0}\"{1}".format(list_items[:-1], self.options["separator"])
272 |                 else:
273 |                     line += "\"{0}\"{1}".format(getattr(sample, value), self.options["separator"])
274 | 
275 |             for value in ["harmless", "malicious", "suspicious", "undetected"]:
276 |                 if (("last_analysis_stats" in attributes) and (value in sample.last_analysis_stats.keys())):
277 |                     line += "\"{0}\"{1}".format(sample.last_analysis_stats[value], self.options["separator"])
278 |                 else:
279 |                     line += "\"{0}\"".format(self.options["separator"])
280 | 
281 |             self.options["csv_files"][sample.type].write("{0}\n".format(line[:-1]))
282 |       
283 |         # verbose level 1
284 |         if sample.type == "file":
285 |             values = ["md5", "sha1", "vhash"]
286 |         elif sample.type == "domain":
287 |             values = ["creation_date", "last_modification_date", "last_update_date"]
288 |         elif sample.type == "url":
289 |             values = ["last_final_url", "title"]
290 |         else:
291 |             values = []
292 |         self.display_values(values, sample, required_verbose_level = 1, file_handle = file_handle)
293 |         
294 |         values = ["magic", "type_tag", "tags", "size"]
295 |         self.display_values(values, sample, required_verbose_level = 1, file_handle = file_handle)
296 | 
297 |         # verbose level 2
298 |         if sample.type in ["file", "url"]:
299 |             values = ["first_submission_date", "last_submission_date", "times_submitted", "unique_sources"]
300 |         elif sample.type == "domain":
301 |             values = ["registrar"]
302 |         else:
303 |             values = []
304 |         self.display_values(values, sample, required_verbose_level = 2, file_handle = file_handle)
305 |    
306 |         values = ["last_analysis_stats"]
307 |         self.display_values(values, sample, ["harmless", "malicious", "suspicious", "undetected"], required_verbose_level = 1, file_handle = file_handle)
308 | 
309 |         # verbose level 3
310 |         self.display_scanning_results(sample, required_verbose_level = 3, file_handle = file_handle)
311 | 
312 |         if file_handle is not None: 
313 |             file_handle.close()
314 |             self.options["auxiliary"].log("Saved summary report: {0}".format(filename), level = "DEBUG")
315 | 
316 | 
317 | 
318 | 


--------------------------------------------------------------------------------