├── Example Usage.py ├── LICENSE ├── PyFunnels ├── PyF_nmap.py ├── PyF_photon.py ├── PyF_recon_ng.py ├── PyF_spiderfoot.py ├── PyF_theharvester.py └── PyFunnels.py ├── Pyfunnels_Concept.gif └── README.md /Example Usage.py: -------------------------------------------------------------------------------- 1 | from PyFunnels import PyFunnels 2 | 3 | PyF = PyFunnels.Funnel() 4 | capabilities = PyF.get_capabilities() 5 | print(capabilities) 6 | 7 | #Specify the output file for each tool in a dictionary. 8 | source_files = { 9 | "spiderfoot":"/path/to/file/spiderfoot.db", 10 | "nmap":"/path/to/file/nmap_results.xml" 11 | "TheHarvester":"/path/to/file/theharvester-tester.xml", 12 | } 13 | #Create a PyFunnels object. 14 | PyF = PyFunnels.Funnel(source_files) 15 | #Do something with it 16 | domains = PyF.funnel_data("domains") 17 | for d in domains: 18 | pass #Your use case here. 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 The Python Packaging Authority 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /PyFunnels/PyF_nmap.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | 3 | class PyFNmap: 4 | CAPABILITIES = ['domains', 'ips', 'tcp_sockets', 'udp_sockets'] 5 | 6 | def __init__(self, 7 | file, 8 | list_domains = [], 9 | list_ips = [], 10 | list_tcp_sockets = [], 11 | list_udp_sockets = [] 12 | ): 13 | self.file = file 14 | self.list_domains = list_domains 15 | self.list_ips = list_ips 16 | self.list_tcp_sockets = list_tcp_sockets 17 | self.list_udp_sockets = list_udp_sockets 18 | self.tree = ET.parse(self.file) 19 | self.root = self.tree.getroot() 20 | 21 | def domains(self): 22 | for h in self.root.iter('hostname'): 23 | host = h.attrib.get("name") 24 | if host not in self.list_ips: 25 | self.list_domains.append(host) 26 | 27 | def ips(self): 28 | for i in self.root.iter('address'): 29 | if "ipv" in i.attrib.get("addrtype"): #Avoids MAC addresses 30 | ip = i.attrib.get("addr") 31 | if ip not in self.list_ips: 32 | self.list_ips.append(ip) 33 | 34 | def tcp_sockets(self): 35 | for child in self.root: #first level 36 | if child.tag == "host": 37 | for child2 in child[3]: #second level 38 | for child3 in child2: # third level 39 | if child3.attrib.get("state") == "open" and child2.attrib.get("protocol") == "tcp" : 40 | socket = child[1].attrib.get("addr") + ":" + child2.attrib.get("portid") 41 | if socket not in self.list_tcp_sockets: 42 | self.list_tcp_sockets.append(socket) 43 | 44 | def udp_sockets(self): 45 | for child in self.root: #first level 46 | if child.tag == "host": 47 | for child2 in child[3]: #second level 48 | for child3 in child2: # third level 49 | if child3.attrib.get("state") == "open" and child2.attrib.get("protocol") == "udp" : 50 | socket = child[1].attrib.get("addr") + ":" + child2.attrib.get("portid") 51 | if socket not in self.list_udp_sockets: 52 | self.list_udp_sockets.append(socket) -------------------------------------------------------------------------------- /PyFunnels/PyF_photon.py: -------------------------------------------------------------------------------- 1 | import os 2 | class PyFPhoton: 3 | CAPABILITIES = ['domains', 'emails'] 4 | 5 | def __init__(self, 6 | file, 7 | list_domains= [], 8 | list_emails = [] 9 | ): 10 | self.file = file 11 | self.list_domains = list_domains 12 | self.list_emails = list_emails 13 | 14 | def domains(self): 15 | for f in os.listdir(self.file): 16 | if f == "subdomains.txt": 17 | f_path = self.file + f 18 | with open(f_path, 'r') as domain_file: 19 | for domain in domain_file: 20 | if domain.rstrip() not in self.list_domains: 21 | self.list_domains.append(domain.rstrip()) 22 | 23 | def emails(self): 24 | for f in os.listdir(self.file): 25 | if f == "intel.txt": 26 | f_path = self.file + f 27 | with open(f_path, 'r') as email_file: 28 | for email in email_file: 29 | if "@" in email: 30 | if email not in self.list_emails: 31 | self.list_emails.append(email) -------------------------------------------------------------------------------- /PyFunnels/PyF_recon_ng.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | class PyFReconng: 4 | BASE_SQL_QUERY = "SELECT {} from {}" 5 | CAPABILITIES = ['domains', 'ips', 'emails'] 6 | 7 | def __init__(self, 8 | file, 9 | list_domains = [], 10 | list_ips = [], 11 | list_emails = [] 12 | ): 13 | self.file = file 14 | self.list_domains = list_domains 15 | self.list_ips = list_ips 16 | self.list_emails = list_emails 17 | 18 | def _get_results_from_db(self, col, table, storage_attribute): 19 | conn = sqlite3.connect(self.file) 20 | cursor = conn.execute(self.BASE_SQL_QUERY.format(col, table)) 21 | for row in cursor: 22 | if row[0] not in storage_attribute: 23 | storage_attribute.append(row[0]) 24 | conn.close() 25 | 26 | def domains(self): 27 | self._get_results_from_db("host", "hosts", self.list_domains) 28 | 29 | def ips(self): 30 | self._get_results_from_db("ip_address", "hosts", self.list_ips) 31 | 32 | def emails(self): 33 | self._get_results_from_db("email", "contacts", self.list_emails) -------------------------------------------------------------------------------- /PyFunnels/PyF_spiderfoot.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | class PyFSpiderfoot: 4 | BASE_SQL_QUERY = "SELECT data from tbl_scan_results WHERE `type` = '{}'" 5 | CAPABILITIES = ['domains', 'ips', 'emails', 'tcp_sockets', 'udp_sockets', 'urls'] 6 | 7 | def __init__(self, 8 | file, 9 | list_domains = [], 10 | list_ips = [], 11 | list_emails = [], 12 | list_tcp_sockets = [], 13 | list_udp_sockets = [], 14 | list_urls = [] 15 | ): 16 | self.file = file 17 | self.list_domains = list_domains 18 | self.list_ips = list_ips 19 | self.list_emails = list_emails 20 | self.list_tcp_sockets = list_tcp_sockets 21 | self.list_udp_sockets = list_udp_sockets 22 | self.list_urls = list_urls 23 | 24 | def _get_results_from_db(self, category_names, storage_attribute): 25 | conn = sqlite3.connect(self.file) 26 | for category in category_names: 27 | cursor = conn.execute(self.BASE_SQL_QUERY.format(category)) 28 | for row in cursor: 29 | if row[0] not in storage_attribute: 30 | storage_attribute.append(row[0]) 31 | conn.close() 32 | 33 | def domains(self): 34 | domain_col_names = ["INTERNET_NAME", "SIMILARDOMAIN"] 35 | self._get_results_from_db(domain_col_names, self.list_domains) 36 | 37 | def ips(self): 38 | ip_col_categories = [ 39 | "AFFILIATE_IPADDR", 40 | "IP_ADDRESS", 41 | ] 42 | self._get_results_from_db(ip_col_categories, self.list_ips) 43 | 44 | def emails(self): 45 | email_col_categories = [ 46 | "EMAILADDR", 47 | ] 48 | self._get_results_from_db(email_col_categories, self.list_emails) 49 | 50 | def tcp_sockets(self): 51 | tcp_port_col_categories = [ 52 | "TCP_PORT_OPEN", 53 | ] 54 | self._get_results_from_db(tcp_port_col_categories, self.list_tcp_sockets) 55 | 56 | def udp_sockets(self): 57 | udp_port_col_categories = [ 58 | "UDP_PORT_OPEN", 59 | ] 60 | self._get_results_from_db(udp_port_col_categories, self.list_udp_sockets) 61 | 62 | def urls(self): 63 | url_col_categories = [ 64 | "LINKED_URL_INTERNAL", 65 | "URL_STATIC", 66 | ] 67 | self._get_results_from_db(url_col_categories, self.list_urls) -------------------------------------------------------------------------------- /PyFunnels/PyF_theharvester.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | class PyFtheHarvester: 3 | CAPABILITIES = ['domains', 'ips', 'emails'] 4 | 5 | def __init__(self, 6 | file, 7 | list_domains = [], 8 | list_ips = [], 9 | list_emails = [] 10 | ): 11 | self.file = file 12 | self.list_domains = list_domains 13 | self.list_ips = list_ips 14 | self.list_emails = list_emails 15 | self.tree = ET.parse(self.file) 16 | self.root = self.tree.getroot() 17 | 18 | def domains(self): 19 | for d in self.root.findall('host'): 20 | domain = d.find('hostname').text 21 | if domain not in self.list_domains: 22 | self.list_domains.append(domain) 23 | 24 | def ips(self): 25 | for i in self.root.findall('host'): 26 | ip = i.find('ip').text 27 | if ip not in self.list_ips: 28 | self.list_ips.append(ip) 29 | 30 | def emails(self): 31 | for e in self.root.findall('email'): 32 | email = e.text 33 | if email not in self.list_emails: 34 | self.list_emails.append(email) -------------------------------------------------------------------------------- /PyFunnels/PyFunnels.py: -------------------------------------------------------------------------------- 1 | from PyFunnels.PyF_nmap import PyFNmap 2 | from PyFunnels.PyF_photon import PyFPhoton 3 | from PyFunnels.PyF_recon_ng import PyFReconng 4 | from PyFunnels.PyF_spiderfoot import PyFSpiderfoot 5 | from PyFunnels.PyF_theharvester import PyFtheHarvester 6 | 7 | class Funnel: 8 | REGISTERED_DATA_SOURCES = { 9 | 'spiderfoot': PyFSpiderfoot, 10 | 'recon_ng': PyFReconng, 11 | 'theharvester': PyFtheHarvester, 12 | 'nmap': PyFNmap, 13 | 'photon': PyFPhoton 14 | } 15 | 16 | def __init__(self, sources=None): 17 | """ 18 | Intiating the data sources 19 | """ 20 | if sources: #Case for no sources is when providing the user help 21 | self.data_sources = [] 22 | self.sources = sources 23 | for k, v in sources.items(): #Ensure the provided data sources are supported 24 | if k in self.REGISTERED_DATA_SOURCES: 25 | self.data_sources.append(v) 26 | 27 | def funnel_data(self, data_point): 28 | """ 29 | Aggregates the output of one or more tools. 30 | """ 31 | storage_attribute = set() 32 | for k, v in self.sources.items(): 33 | data_source_object = self.REGISTERED_DATA_SOURCES[k.lower()](v) #Tool class being worked on 34 | if data_point in data_source_object.CAPABILITIES: #Ensure the data point is supported by the tool 35 | method = getattr(data_source_object, data_point) #pass in the tool class and method 36 | method() 37 | list_data_point = getattr(data_source_object, "list_" + data_point) 38 | for data in list_data_point: 39 | storage_attribute.add(data) 40 | if storage_attribute: 41 | return storage_attribute 42 | 43 | def get_capabilities(self): 44 | capabilities = {} 45 | for k, v in self.REGISTERED_DATA_SOURCES.items(): 46 | method = getattr(v, "CAPABILITIES" ) #Pull the capabilities listed in the tool class. 47 | tool = '{0}'.format(k) 48 | caps = method 49 | capabilities.update({tool : caps}) 50 | return capabilities -------------------------------------------------------------------------------- /Pyfunnels_Concept.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/packetvitality/PyFunnels/f8089c3c39248eb1ef97f2681c43f76f55a07900/Pyfunnels_Concept.gif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyFunnels 2 | 3 | The goal of PyFunnels is to create a collaborative code library which makes integrating data into automated workflows easier. The library acts as a centralized location where everyone can contribute and use code. 4 | 5 | PyFunnels consists of multiple classes structured modularly so that additional tools and data points can be easily added and work independently of one another. The classes within the library can be thought of as a catalog of tools and methods to retrieve data. Not all data point methods are required for each tool, meaning a new tool can be added with only a single method. Ideally, all data points would be supported for each tool but this structure allows the functionality to grow organically and makes it easy to contribute code to the project. 6 | 7 | ![](Pyfunnels_Concept.gif) 8 | 9 | The library reduces the time it takes information security professionals to utilize output from tools. For example, consider the following workflow: 10 | 1. Collect data with tool one. 11 | 2. Collect data with tool two. 12 | 3. Write code to isolate the data for tool one. 13 | 4. Write code to isolate and data for tool two. 14 | 5. Merge data into a standard format. 15 | 6. Remove duplicated data. 16 | 7. Expose normalized data. 17 | 18 | To summarize, this workflow can be reduced to the following using PyFunnels: 19 | 1. Specify output files 20 | 2. Initiate an object. 21 | 3. Use method on the object. 22 | 4. Expose normalized data. 23 | 24 | PyFunnels has been purposely structured for ease of use and extensibility to new tools and data points. Users of the library are encouraged to contribute code for new tools and data points they find useful. Whenever a user creates Python3 code to isolate data from the output of a tool, he or she is encouraged to commit that code to PyFunnels so others in the community can use it as well. 25 | --------------------------------------------------------------------------------