--help' for more information on a specific module.
84 | """
85 | # Everything starts here
86 | pass
87 |
88 | # The OSINT module -- This is the primary module that does all the stuff
89 | # Basic, required arguments
90 | @odin.command(name='osint',short_help="The full OSINT suite of tools will be run (see README).")
91 | @click.option('-o','--organization',help='The target client, such as "ABC Company," to use for \
92 | report titles and searches for domains and cloud storage buckets.',required=True)
93 | @click.option('-d','--domain',help="The target's primary domain, such as example.com. Use \
94 | whatever the target uses for email and their main website. Provide additional domains in a scope \
95 | file using --scope-file.",required=True)
96 | # Optional arguments
97 | @click.option('-sf','--scope-file',type=click.Path(exists=True,readable=True,\
98 | resolve_path=True),help="A text file containing additional domain names you want to include. IP \
99 | addresses can also be provided, if necessary. List each one on a new line.",required=False)
100 | @click.option('--whoxy-limit',default=10,help="The maximum number of domains discovered via \
101 | reverse WHOIS that ODIN will resolve and use when searching services like Censys and Shodan. \
102 | You may get hundreds of results from reverse WHOIS, so this is intended to save time and \
103 | API credits. Default is 10 domains and setting it above maybe 20 or 30 is not recommended. \
104 | It is preferable to perform a search using a tool like Vincent Yiu's DomLink and then provide \
105 | the newly discovered domains in your scope file with --scope-file.")
106 | @click.option('--typo',is_flag=True,help="Generate a list of lookalike domain names for the \
107 | provided domain (--domain), check if they have been registered, and then check those domains \
108 | against URLVoid and Cymon.io to see if the domains or associated IP addresses have been \
109 | flagged as malicious.")
110 | # File searching arguments
111 | @click.option('--files',is_flag=True,help="Use this option to use Google to search for files \
112 | under the provided domain (--domain), download files, and extract metadata.")
113 | @click.option('-e','--ext',default="all",help="File extensions to look for with --file. \
114 | Default is 'all' or you can pick from key, pdf, doc, docx, xls, xlsx, and ppt.")
115 | # Cloud-related arguments
116 | @click.option('-w','--aws',help="A list of additional keywords to be used when searching for \
117 | cloud sotrage buckets.",type=click.Path(exists=True,readable=True,resolve_path=True))
118 | @click.option('-wf','--aws-fixes',help="A list of strings to be added to the start and end of \
119 | the cloud storage bucket names.",type=click.Path(exists=True,readable=True,resolve_path=True))
120 | # Reporting-related arguments
121 | @click.option('--html',is_flag=True,help="Create an HTML report at the end for easy browsing.")
122 | @click.option('--graph',is_flag=True,help="Create a Neo4j graph database from the completed \
123 | SQLite3 database.")
124 | @click.option('--nuke',is_flag=True,help="Clear the Neo4j project before converting the \
125 | database. This is only used with --graph.")
126 | @click.option('--screenshots',is_flag=True,help="Attempt to take screenshots of discovered \
127 | web services.")
128 | @click.option('--unsafe',is_flag=True,help="Adding this flag will spawn the headless Chrome \
129 | browser with the --no-sandbox command line flag. This is NOT recommended for any users who are \
130 | NOT running ODIN on a Kali Linux VM as root. Chrome will not run as the root user on Kali \
131 | without this option.")
132 | # Pass the above arguments on to your osint function
133 | @click.pass_context
134 |
135 | def osint(self,organization,domain,files,ext,scope_file,aws,aws_fixes,html,
136 | screenshots,graph,nuke,whoxy_limit,typo,unsafe):
137 | """
138 | The OSINT toolkit:
139 |
140 | This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
141 | provided and hunt for information. On the human side, ODIN looks for employee names,
142 | email addresses, and social media profiles. Names and emails are cross-referenced with
143 | HaveIBeenPwned, Twitter's API, and search engines to collect additional information.
144 |
145 | ODIN also uses various tools and APIs to collect information on the provided IP addresses
146 | and domain names, including things like DNS and IP address history.
147 |
148 | View the wiki for the full details, reporting information, and lists of API keys.
149 |
150 | Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include:
151 |
152 | * Single Address: 8.8.8.8
153 |
154 | * Basic CIDR: 8.8.8.0/24
155 |
156 | * Nmap-friendly Range: 8.8.8.8-10
157 |
158 | * Underscores? OK: 8.8.8.8_8.8.8.10
159 | """
160 | click.clear()
161 | click.secho(asciis.print_art(),fg="magenta")
162 | click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta")
163 | click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",fg="green")
164 | # Perform prep work for reporting
165 | setup_reports(organization)
166 | report_path = "reports/{}/".format(organization)
167 | output_report = report_path + "OSINT_DB.db"
168 | if __name__ == "__main__":
169 | # Create manager server to handle variables shared between jobs
170 | manager = Manager()
171 | ip_list = manager.list()
172 | domain_list = manager.list()
173 | rev_domain_list = manager.list()
174 | # Create reporter object and generate lists of everything, just IP addresses, and just domains
175 | browser = helpers.setup_headless_chrome(unsafe)
176 | report = reporter.Reporter(organization,report_path,output_report,browser)
177 | report.create_tables()
178 | scope,ip_list,domain_list = report.prepare_scope(ip_list,domain_list,scope_file,domain)
179 | # Create some jobs and put Python to work!
180 | # Job queue 1 is for the initial phase
181 | jobs = []
182 | # Job queue 2 is used for jobs using data from job queue 1
183 | more_jobs = []
184 | # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
185 | even_more_jobs = []
186 | # Phase 1 jobs
187 | company_info = Process(name="Company Info Collector",
188 | target=report.create_company_info_table,
189 | args=(domain,))
190 | jobs.append(company_info)
191 | employee_report = Process(name="Employee Hunter",
192 | target=report.create_people_table,
193 | args=(domain_list,rev_domain_list,organization))
194 | jobs.append(employee_report)
195 | domain_report = Process(name="Domain and IP Hunter",
196 | target=report.create_domain_report_table,
197 | args=(organization,scope,ip_list,domain_list,rev_domain_list,whoxy_limit))
198 | jobs.append(domain_report)
199 | # Phase 2 jobs
200 | shodan_report = Process(name="Shodan Hunter",
201 | target=report.create_shodan_table,
202 | args=(ip_list,domain_list))
203 | more_jobs.append(shodan_report)
204 | if typo:
205 | lookalike_report = Process(name="Lookalike Domain Reviewer",
206 | target=report.create_lookalike_table,
207 | args=(organization,domain))
208 | more_jobs.append(lookalike_report)
209 | if screenshots:
210 | take_screenshots = Process(name="Screenshot Snapper",
211 | target=report.capture_web_snapshots,
212 | args=(report_path,browser))
213 | more_jobs.append(take_screenshots)
214 | if files:
215 | files_report = Process(name="File Hunter",
216 | target=report.create_metadata_table,
217 | args=(domain,ext,report_path))
218 | more_jobs.append(files_report)
219 | # Phase 3 jobs
220 | cloud_report = Process(name="Cloud Hunter",
221 | target=report.create_cloud_table,
222 | args=(organization,domain,aws,aws_fixes))
223 | even_more_jobs.append(cloud_report)
224 | # Process the lists of jobs in phases, starting with phase 1
225 | click.secho("[+] Beginning initial discovery phase! This could take some time...",fg="green")
226 | for job in jobs:
227 | click.secho("[+] Starting new process: {}".format(job.name),fg="green")
228 | job.start()
229 | for job in jobs:
230 | job.join()
231 | # Wait for phase 1 and then begin phase 2 jobs
232 | click.secho("[+] Initial discovery is complete! Proceeding with additional queries...",fg="green")
233 | for job in more_jobs:
234 | click.secho("[+] Starting new process: {}".format(job.name),fg="green")
235 | job.start()
236 | for job in more_jobs:
237 | job.join()
238 | # Wait for phase 2 and then begin phase 3 jobs
239 | click.secho("[+] Final phase: checking the cloud and web services...",fg="green")
240 | for job in even_more_jobs:
241 | click.secho("[+] Starting new process: {}".format(job.name),fg="green")
242 | job.start()
243 | for job in even_more_jobs:
244 | job.join()
245 | # All jobs are done, so close out the SQLIte3 database connection
246 | report.close_out_reporting()
247 | click.secho("[+] Job's done! Your results are in {} and can be viewed and queried with \
248 | any SQLite browser.".format(output_report),fg="green")
249 | # Perform additional tasks depending on the user's command line options
250 | if graph:
251 | graph_reporter = grapher.Grapher(output_report)
252 | click.secho("[+] Loading ODIN database file {} for conversion to Neo4j".format(output_report),fg="green")
253 | if nuke:
254 | if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \
255 | fresh start. Proceed?",fg="red"),default=True):
256 | try:
257 | graph_reporter.clear_neo4j_database()
258 | click.secho("[+] Database successfully wiped!\n",fg="green")
259 | except Exception as error:
260 | click.secho("[!] Failed to clear the database! Check the Neo4j console and \
261 | your configuration and try running grapher.py again.",fg="red")
262 | click.secho("L.. Details: {}".format(error),fg="red")
263 | else:
264 | click.secho("[!] You can convert your database to a graph database later. \
265 | Run lib/grapher.py with the appropriate options.",fg="red")
266 | try:
267 | graph_reporter.convert()
268 | except Exception as error:
269 | click.secho("[!] Failed to convert the database! Check the Neo4j console and \
270 | your configuration and try running grapher.py again.",fg="red")
271 | click.secho("L.. Details: {}".format(error),fg="red")
272 | if html:
273 | click.secho("\n[+] Creating the HTML report using {}.".format(output_report),fg="green")
274 | try:
275 | html_reporter = htmlreporter.HTMLReporter(organization,report_path + "/html_report/",output_report)
276 | html_reporter.generate_full_report()
277 | except Exception as error:
278 | click.secho("[!] Failed to create the HTML report!",fg="red")
279 | click.secho("L.. Details: {}".format(error),fg="red")
280 |
281 | # The VERIFY module -- No OSINT, just a way to check a ownership of a list of IPs
282 | @odin.command(name='verify',short_help="This module assists with verifying ownership of a list \
283 | of IP addresses. This returns a csv file with SSL cert, WHOIS, and other data for verification.")
284 | @click.option('-o','--organization',help='The target client, such as "ABC Company," to use for \
285 | report titles and some keyword searches.',required=True)
286 | @click.option('-sf','--scope-file',help="Name of the file with your IP addresses.",\
287 | type=click.Path(exists=True,readable=True,resolve_path=True),required=True)
288 | @click.option('-r','--report',default="Verification.csv",help="Output file (CSV) for the \
289 | findings.")
290 | # Pass the above arguments on to your verify function
291 | @click.pass_context
292 |
293 | def verify(self,organization,scope_file,report):
294 | """
295 | The Verify module:
296 |
297 | Uses reverse DNS, ARIN, and SSL/TLS certificate information to help you verify ownership of a
298 | list of IP addresses.
299 |
300 | This is only for verifying IP addresses. Domains may not have public ownership information
301 | available. Compare the IP ownership information from ARIN and certificate information to what
302 | you know about the presumed owner to determine ownership.
303 |
304 | Acceptable IP addresses/ranges include:
305 |
306 | * Single Address: 8.8.8.8
307 |
308 | * Basic CIDR: 8.8.8.0/24
309 |
310 | * Nmap-friendly Range: 8.8.8.8-10
311 |
312 | * Underscores? OK: 8.8.8.8_8.8.8.10
313 | """
314 | click.secho(asciis.print_art(),fg="magenta")
315 | click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta")
316 | click.secho("[+] Scope Verification Module Selected: ODIN will attempt to verify who owns \
317 | the provided IP addresses.",fg="green")
318 | setup_reports(organization)
319 | report_path = "reports/{}/{}".format(organization,report)
320 | expanded_scope = []
321 | results = {}
322 | try:
323 | verification.prepare_scope(scope_file,expanded_scope)
324 | verification.perform_whois(expanded_scope,results)
325 | verification.print_output(results,report_path)
326 | except Exception as error:
327 | click.secho("[!] Verification failed!",fg="red")
328 | click.secho("L.. Details: {}".format(error),fg="red")
329 | click.secho("[+] Job's done! Your identity report is in {}.".format(report_path),fg="green")
330 |
331 | if __name__ == "__main__":
332 | odin()
333 |
--------------------------------------------------------------------------------
/lib/subdomains.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | This module contains everything needed to hunt for subdomains, including collecting certificate
6 | data from Censys.io and crt.sh for a given domain name.
7 |
8 | The original crt.sh code is from PaulSec's unofficial crt.sh API. That project can be
9 | found here:
10 |
11 | https://github.com/PaulSec/crt.sh
12 | """
13 |
14 | import re
15 | import json
16 | import base64
17 | from time import sleep
18 |
19 | import click
20 | import requests
21 | import censys.certificates
22 | from bs4 import BeautifulSoup
23 |
24 | from . import helpers
25 |
26 |
27 | class CertSearcher(object):
28 | """Class for searching crt.sh and Censys.io for certificates and parsing the results."""
29 | # Set a timeout, in seconds, for the web requests
30 | requests_timeout = 10
31 | # The user-agent and endpoint URIs used for the web requests
32 | crtsh_base_uri = "https://crt.sh/?q={}&output=json"
33 | user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
34 |
35 | def __init__(self):
36 | """Everything that should be initiated with a new object goes here."""
37 | try:
38 | censys_api_id = helpers.config_section_map("Censys")["api_id"]
39 | censys_api_secret = helpers.config_section_map("Censys")["api_secret"]
40 | self.censys_cert_search = censys.certificates.CensysCertificates(api_id=censys_api_id,api_secret=censys_api_secret)
41 | except censys.base.CensysUnauthorizedException:
42 | self.censys_cert_search = None
43 | click.secho("[!] Censys reported your API information is invalid, so Censys searches will be skipped.",fg="yellow")
44 | click.secho("L.. You provided ID %s & Secret %s" % (censys_api_id,censys_api_secret),fg="yellow")
45 | except Exception as error:
46 | self.censys_cert_search = None
47 | click.secho("[!] Did not find a Censys API ID/secret.",fg="yellow")
48 | click.secho("L.. Details: {}".format(error),fg="yellow")
49 |
50 | def search_crtsh(self,domain,wildcard=True):
51 | """Collect certificate information from crt.sh for the target domain name. This returns
52 | a JSON containing certificate information that includes the issuer, issuer and expiration
53 | dates, and the name.
54 |
55 | Parameters:
56 | domain Domain to search for on crt.sh
57 | wildcard Whether or not to prepend a wildcard to the domain (default: True)
58 |
59 | Return a list of objects, like so:
60 | {
61 | "issuer_ca_id": 16418,
62 | "issuer_name": "C=US, O=Let's Encrypt, CN=Let's Encrypt Authority X3",
63 | "name_value": "hatch.uber.com",
64 | "min_cert_id": 325717795,
65 | "min_entry_timestamp": "2018-02-08T16:47:39.089",
66 | "not_before": "2018-02-08T15:47:39"
67 | }
68 | """
69 | headers = {"User-Agent":self.user_agent}
70 | if wildcard:
71 | domain = "%25.{}".format(domain)
72 | try:
73 | req = requests.get(self.crtsh_base_uri.format(domain),headers=headers,timeout=self.requests_timeout)
74 | if req.ok:
75 | try:
76 | content = req.content.decode("utf-8")
77 | data = json.loads("[{}]".format(content.replace('}{','},{')))
78 | return data
79 | except:
80 | pass
81 | except requests.exceptions.Timeout:
82 | click.secho("\n[!] The connection to crt.sh timed out!",fg="red")
83 | except requests.exceptions.TooManyRedirects:
84 | click.secho("\n[!] The connection to crt.sh encountered too many redirects!",fg="red")
85 | except requests.exceptions.RequestException as error:
86 | click.secho("\n[!] The connection to crt.sh encountered an error!",fg="red")
87 | click.secho("L.. Details: {}".format(error),fg="red")
88 | return None
89 |
90 | def search_censys_certificates(self,target):
91 | """Collect certificate information from Censys for the target domain name. This returns
92 | a dictionary of certificate information that includes the issuer, subject, and a hash
93 | Censys uses for the /view/ API calls to fetch additional information.
94 |
95 | A Censys API key is required.
96 |
97 | Parameters
98 | target The domain name, e.g. apple.com, to be looked-up with on Censys.
99 | """
100 | if self.censys_cert_search is None:
101 | pass
102 | else:
103 | try:
104 | # Use the `parsed.names` filter to avoid unwanted domains
105 | query = "parsed.names: %s" % target
106 | results = self.censys_cert_search.search(query,fields=['parsed.names',
107 | 'parsed.signature_algorithm.name','parsed.signature.self_signed',
108 | 'parsed.validity.start','parsed.validity.end','parsed.fingerprint_sha256',
109 | 'parsed.subject_dn','parsed.issuer_dn'])
110 | return results
111 | except censys.base.CensysRateLimitExceededException:
112 | click.secho("\n[!] Censys reports your account has run out of API credits.",fg="red")
113 | return None
114 | except Exception as error:
115 | click.secho("\n[!] Error collecting Censys certificate data for {}.".format(target),fg="red")
116 | click.secho("L.. Details: {}".format(error),fg="red")
117 | return None
118 |
119 | def parse_cert_subdomain(self,subject_dn):
120 | """Accepts the Censys certificate data and parses the individual certificate's domain.
121 |
122 | Parameters:
123 | subject_dn Accepts the subject_dn field from a Censys search result.
124 | """
125 | if "," in subject_dn:
126 | pos = subject_dn.find('CN=')+3
127 | else:
128 | pos = 3
129 | tmp = subject_dn[pos:]
130 | if "," in tmp:
131 | pos = tmp.find(",")
132 | tmp = tmp[:pos]
133 | return tmp
134 |
135 | def filter_subdomains(self,domain,subdomains):
136 | """Filter out uninteresting domains that may be returned from certificates. These are
137 | domains unrelated to the true target. For example, a search for blizzard.com on Censys
138 | can return iran-blizzard.ir, an unwanted and unrelated domain.
139 |
140 | Credit to christophetd for this nice bit of code:
141 | https://github.com/christophetd/censys-subdomain-finder/blob/master/censys_subdomain_finder.py#L31
142 |
143 | Parameters:
144 | domain The base domain to be used for filtering subdomains, e.g. apple.com
145 | subdomains A list of collected subdomains to filter
146 | """
147 | return [ subdomain for subdomain in subdomains if '*' not in subdomain and subdomain.endswith(domain) ]
148 |
149 |
150 | class SubdomainCollector(object):
151 | """Class for scraping DNS Dumpster and NetCraft to discover subdomains."""
152 | # Set a timeout, in seconds, for the web requests
153 | requests_timeout = 10
154 | # The user-agent and endpoint URIs used for the web requests
155 | dnsdumpster_uri = "https://dnsdumpster.com/"
156 | findsubdomains_uri = "https://findsubdomains.com/subdomains-of/{}"
157 | netcraft_uri = "http://searchdns.netcraft.com/?host={}"
158 | netcraft_history_uri = "http://toolbar.netcraft.com/site_report?url={}"
159 | user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
160 |
161 | def __init__(self,webdriver=None):
162 | """Everything that should be initiated with a new object goes here.
163 |
164 | Parameters:
165 | webdriver A selenium webdriver object to be used for automated web browsing
166 | """
167 | self.browser = webdriver
168 | self.browser.set_page_load_timeout(10)
169 |
170 | def check_dns_dumpster(self,domain):
171 | """Collect subdomains known to DNS Dumpster for the provided domain. This is based on
172 | PaulSec's unofficial DNS Dumpster API available on GitHub.
173 |
174 | Parameters:
175 | domain The domain to search for on DNS Dumpster
176 | """
177 | results = {}
178 | cookies = {}
179 | # Disable SSL warnings and create a session for web browsing
180 | requests.packages.urllib3.disable_warnings()
181 | session = requests.session()
182 | # Try connecting to DNS Dumpster
183 | # This is all in one try/except because request 1 must success for request 2
184 | try:
185 | # Make a request to stash the CSRF token and setup cookies and headers for the next request
186 | request = session.get(self.dnsdumpster_uri,verify=False,timeout=self.requests_timeout)
187 | csrf_token = session.cookies['csrftoken']
188 | cookies['csrftoken'] = session.cookies['csrftoken']
189 | headers = {"Referer": self.dnsdumpster_uri}
190 | data = {"csrfmiddlewaretoken": csrf_token,"targetip":domain}
191 | # Now make a POST to DNS Dumpster with the new cookies and headers to perform the search
192 | request = session.post(self.dnsdumpster_uri,cookies=cookies,data=data,headers=headers,timeout=self.requests_timeout)
193 | # Check if a 200 OK was returned
194 | if request.ok:
195 | soup = BeautifulSoup(request.content,"lxml")
196 | tables = soup.findAll("table")
197 | results['domain'] = domain
198 | results['dns_records'] = {}
199 | results['dns_records']['dns'] = self._retrieve_results(tables[0])
200 | results['dns_records']['mx'] = self._retrieve_results(tables[1])
201 | results['dns_records']['txt'] = self._retrieve_txt_record(tables[2])
202 | results['dns_records']['host'] = self._retrieve_results(tables[3])
203 | # Try to fetch the network mapping image
204 | try:
205 | val = soup.find('img',attrs={'class': 'img-responsive'})['src']
206 | tmp_url = "{}{}".format(self.dnsdumpster_uri,val)
207 | image_data = base64.b64encode(requests.get(tmp_url,timeout=self.requests_timeout).content)
208 | except Exception:
209 | image_data = None
210 | finally:
211 | results['image_data'] = image_data
212 | else:
213 | click.secho("\n[!] The DNS Dumpster request returned a {} status code!".format(request.status_code),fg="red")
214 | except requests.exceptions.Timeout:
215 | click.secho("\n[!] The connection to crt.sh timed out!",fg="red")
216 | except requests.exceptions.TooManyRedirects:
217 | click.secho("\n[!] The connection to crt.sh encountered too many redirects!",fg="red")
218 | except requests.exceptions.RequestException as error:
219 | click.secho("\n[!] The connection to crt.sh encountered an error!",fg="red")
220 | click.secho("L.. Details: {}".format(error),fg="red")
221 | return results
222 |
223 | def _retrieve_results(self,table):
224 | """Used by check_dns_dumpster() to extract the results from the HTML.
225 |
226 | Parameters:
227 | table The HTML table pulled from DNS Dumpster results
228 | """
229 | results = []
230 | trs = table.findAll('tr')
231 | for tr in trs:
232 | tds = tr.findAll('td')
233 | pattern_ip = r'([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})'
234 | ip = re.findall(pattern_ip,tds[1].text)[0]
235 | domain = tds[0].text.replace('\n','').split(' ')[0]
236 | header = ' '.join(tds[0].text.replace('\n','').split(' ')[1:])
237 | reverse_dns = tds[1].find('span',attrs={}).text
238 | additional_info = tds[2].text
239 | country = tds[2].find('span',attrs={}).text
240 | autonomous_system = additional_info.split(' ')[0]
241 | provider = ' '.join(additional_info.split(' ')[1:])
242 | provider = provider.replace(country,'')
243 | data = {'domain':domain,
244 | 'ip':ip,
245 | 'reverse_dns':reverse_dns,
246 | 'as':autonomous_system,
247 | 'provider':provider,
248 | 'country':country,
249 | 'header':header}
250 | results.append(data)
251 | return results
252 |
253 | def _retrieve_txt_record(self,table):
254 | """Used by check_dns_dumpster() to extracts the domain's DNS TXT records.
255 |
256 | Parameters:
257 | table The HTML table pulled from DNS Dumpster results
258 | """
259 | results = []
260 | for td in table.findAll('td'):
261 | results.append(td.text)
262 | return results
263 |
264 | def check_netcraft(self,domain):
265 | """Collect subdomains known to NetCraft for the provided domain. NetCraft blocks scripted
266 | requests by requiring cookies and JavaScript for all browser, so Selenium is required.
267 |
268 | This is based on code from the DataSploit project, but updated to work with today's
269 | NetCraft and Python 3.
270 |
271 | Parameters:
272 | domain The domain to look-up on NetCraft
273 | """
274 | results = []
275 | target_dom_name = domain.split(".")
276 | self.browser.get(self.netcraft_uri.format(domain))
277 | link_regx = re.compile(r'')
278 | links_list = link_regx.findall(self.browser.page_source)
279 | for x in links_list:
280 | dom_name = x.split("/")[2].split(".")
281 | if (dom_name[len(dom_name) - 1] == target_dom_name[1]) and \
282 | (dom_name[len(dom_name) - 2] == target_dom_name[0]):
283 | results.append(x.split("/")[2])
284 | num_regex = re.compile('Found (.*) site')
285 | num_subdomains = num_regex.findall(self.browser.page_source)
286 | if not num_subdomains:
287 | num_regex = re.compile('First (.*) sites returned')
288 | num_subdomains = num_regex.findall(self.browser.page_source)
289 | if num_subdomains:
290 | if num_subdomains[0] != str(0):
291 | num_pages = int(num_subdomains[0]) // 20 + 1
292 | if num_pages > 1:
293 | last_regex = re.compile(
294 | '| %s. | \n' % (20))
295 | last_item = last_regex.findall(self.browser.page_source)[0].split("/")[2]
296 | next_page = 21
297 | for x in range(2,num_pages):
298 | url = "http://searchdns.netcraft.com/?host=%s&last=%s&from=%s&restriction=/site%%20contains" % (domain,last_item,next_page)
299 | self.browser.get(url)
300 | link_regx = re.compile(
301 | r'')
302 | links_list = link_regx.findall(self.browser.page_source)
303 | for y in links_list:
304 | dom_name1 = y.split("/")[2].split(".")
305 | if (dom_name1[len(dom_name1) - 1] == target_dom_name[1]) and \
306 | (dom_name1[len(dom_name1) - 2] == target_dom_name[0]):
307 | results.append(y.split("/")[2])
308 | last_item = links_list[len(links_list) - 1].split("/")[2]
309 | next_page = 20 * x + 1
310 | else:
311 | pass
312 | return results
313 |
314 | def fetch_netcraft_domain_history(self,domain):
315 | """Fetch a domain's IP address history from NetCraft.
316 |
317 | Parameters:
318 | domain The domain to look-up on NetCraft
319 | """
320 | # TODO: See if the "Last Seen" and other data can be easily collected for here
321 | ip_history = []
322 | sleep(1)
323 | self.browser.get(self.netcraft_history_uri.format(domain))
324 | soup = BeautifulSoup(self.browser.page_source,'html.parser')
325 | urls_parsed = soup.findAll('a',href=re.compile(r".*netblock\?q.*"))
326 | for url in urls_parsed:
327 | if urls_parsed.index(url) != 0:
328 | result = [str(url).split('=')[2].split(">")[1].split("<")[0],\
329 | str(url.parent.findNext('td')).strip(" | ").strip(" | ")]
330 | ip_history.append(result)
331 | return ip_history
332 |
333 | def query_subdomainof(self,domain):
334 | """Look-up the given domain on findsubdomains.com and parse the results to get a list of
335 | subdomains.
336 |
337 | Parameters:
338 | domain The base domain for the subdomains query
339 | """
340 | subdomains = []
341 | headers = { 'User-Agent': self.user_agent }
342 | request = requests.get(self.findsubdomains_uri.format(domain),headers=headers,timeout=self.requests_timeout)
343 | soup = BeautifulSoup(request.content,"lxml")
344 | subdomain_links = soup.findAll('a',{'class': 'aggregated-link'})
345 | for subdomain in subdomain_links:
346 | if not subdomain.string.strip() == domain:
347 | subdomains.append(subdomain.string.strip())
348 | unique_subdomains = list(set(subdomains))
349 | return unique_subdomains
350 |
--------------------------------------------------------------------------------