├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── core └── __init__.py ├── namescraper.py ├── requirements.txt └── scrapers ├── __init__.py ├── securitytrails.py ├── viewdns.py └── whoisxmlapi.py /.gitignore: -------------------------------------------------------------------------------- 1 | session.txt 2 | __pycache__/ 3 | *.py[cod] 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2023 Artur Henrique Marzano Gonzaga 4 | 5 | Permission is hereby granted, free of charge, to any person 6 | obtaining a copy of this software and associated documentation 7 | files (the "Software"), to deal in the Software without 8 | restriction, including without limitation the rights to use, 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NameScraper 2 | 3 | **NameScraper** is a Selenium scraper for public domain search tools. NameScraper uses the [Undetected Chromedriver](https://github.com/ultrafunkamsterdam/undetected-chromedriver) to perform queries in various platforms and collect the results. 4 | 5 | ## Disclaimer 6 | 7 | This tool was developed for personal research purposes. I am not responsible for whatever actions are performed with this tool and I will not be maintaining it extensively or improving it to bypass captchas or other complex rate-limiting controls that these third-party tools may implement in the future. 8 | 9 | 10 | ## Supported Lookups 11 | 12 | This tool can perform and collect the results for the following tools & lookups: 13 | 14 | ### SecurityTrails 15 | 16 | In order to use this module you must have a SecurityTrails account, authenticate with it and save the contents of your `SecurityTrails` session cookie to a file named `session.txt`. Then run the tool with the appropriate `--lookup` flag: 17 | 18 | * DNS (`dns`) 19 | * Historical DNS (`historicaldns`) 20 | * Subdomains (`subdomains` - **default lookup**) 21 | * Reverse CNAME (`reversecname`) 22 | * Reverse NS (`reversens`) 23 | * Reverse MX (`reversemx`) 24 | 25 | ### ViewDNS 26 | 27 | * ASN Lookup (`asnlookup`) 28 | * Abuse Lookup (`abuselookup`) 29 | * Chinese Firewall Test (`chinesefirewall`) 30 | * DNS Record (`dnsrecord` - **default lookup**) 31 | * DNS Report (`dnsreport`) 32 | * DNSSEC (`dnssec`) 33 | * Free Email (`freeemail`) 34 | * HTTP Headers (`httpheaders`) 35 | * IP History (`iphistory`) 36 | * IP Location (`iplocation`) 37 | * Iran Firewall Test (`iranfirewall`) 38 | * Is my site down (`ismysitedown`) 39 | * MAC Lookup (`maclookup`) 40 | * Ping (`ping`) 41 | * Portscan (`portscan`) 42 | * Propagation (`propagation`) 43 | * Reverse DNS (`reversedns`) 44 | * Reverse IP (`reverseip`) 45 | * Reverse MX (`reversemx`) 46 | * Reverse NS (`reversens`) 47 | * Reverse Whois (`reversewhois`) 48 | * Spam DB lookup (`spamdblookup`) 49 | * Traceroute (`traceroute`) 50 | * Whois (`whois`) 51 | 52 | Please note that ViewDNS's output format is not standard - it varies a lot depending on the type of the lookup, so the results are converted to text using the [html2text](https://github.com/Alir3z4/html2text) library. You'll probably have to parse the results somehow after using this tool. 53 | 54 | ### WhoisXMLAPI 55 | 56 | In order to use this module you must have a WhoisXMLAPI account, authenticate with it and save the contents of your `emailverification_session` session cookie to a file named `session.txt`. Then run the tool with the appropriate `--lookup` flag: 57 | 58 | * Subdomains (`subdomains` - **default lookup**) 59 | * Reverse NS (`reversens`) 60 | * Whois (`whois`) 61 | * Whois History (`whoishistory`) 62 | * Reverse Whois (`reversewhois`) 63 | * DNS Lookup (`dnslookup`) 64 | * DNS History (`dnshistory`) 65 | * Reverse MX (`reversemx`) 66 | * IP Geolocation (`ipgeolocation`) 67 | * IP Netblocks (`ipnetblocks`) 68 | * Website Contacts (`websitecontacts`) 69 | * Website Categorization (`websitecategory`) 70 | * Domain Availability (`domainavailability`) 71 | * Email Verification (`emailverification`) 72 | 73 | Note that currently this scraper has the limitation of using the `Copy to clipboard` feature of WhoisXMLAPI to copy the results as a JSON object, therefore you must *not use the clipboard for other purposes* while it's running. 74 | 75 | # Prerequisites 76 | 77 | - [Selenium](https://github.com/SeleniumHQ/Selenium) 78 | - [Undetected Chromedriver](https://github.com/ultrafunkamsterdam/undetected-chromedriver) 79 | 80 | Run the following to install the Python requirements: 81 | ```python 82 | $ pip install -r requirements.txt 83 | ``` 84 | 85 | # Usage 86 | 87 | Run the tool with: 88 | 89 | ```bash 90 | $ python namescraper.py --query --output results.txt --lookup 91 | ``` 92 | 93 | If an output file is not provided with `--output`, the results are simply going to be printed to the screen. If you just want to use the default lookup for the provider, you can also omit the lookup type: 94 | 95 | ```bash 96 | $ python namescraper.py --query 97 | ``` 98 | 99 | ## Optional flags 100 | - `--queriesfile` - Look up all lines from a file in the same session instead of providing a single query. 101 | - `--timeout` - Default explicit timeout to use in Selenium when looking for elements in the page. 102 | - `--headless` - Run the webdriver in headless mode. 103 | 104 | # Contributing 105 | 106 | Contributions are welcome by [opening an issue](https://github.com/Macmod/NameScraper/issues/new) or by [submitting a pull request](https://github.com/Macmod/NameScraper/pulls). If you find any bugs please let me know - I don't have many test environments to validate every edge case. 107 | 108 | # Todo 109 | * Perform more tests with all supported lookups and features to identify possible bugs 110 | * Improve scraper logic to increase efficiency and avoid race conditions 111 | * Improve project structure and instructions 112 | * Improve error handling 113 | * Support for explicit authentication credentials with automatic reauthentication when the session expires 114 | * SecurityTrails - Improve stability of simple DNS lookups (sometimes it works, sometimes it doesn't...) 115 | * SecurityTrails - Paginate historical DNS lookups 116 | * WhoisXMLAPI - Use an alternate method of getting results in JSON without relying on the clipboard 117 | * WhoisXMLAPI - Stop collection if the max limit for public queries is reached 118 | 119 | # License 120 | The MIT License (MIT) 121 | 122 | Copyright (c) 2023 Artur Henrique Marzano Gonzaga 123 | 124 | Permission is hereby granted, free of charge, to any person 125 | obtaining a copy of this software and associated documentation 126 | files (the "Software"), to deal in the Software without 127 | restriction, including without limitation the rights to use, 128 | copy, modify, merge, publish, distribute, sublicense, and/or sell 129 | copies of the Software, and to permit persons to whom the 130 | Software is furnished to do so, subject to the following 131 | conditions: 132 | 133 | The above copyright notice and this permission notice shall be 134 | included in all copies or substantial portions of the Software. 135 | 136 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 137 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 138 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 139 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 140 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 141 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 142 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 143 | OTHER DEALINGS IN THE SOFTWARE. 144 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Macmod/NameScraper/6c56522ae73ff9464724d10f477b87278d67d529/__init__.py -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Macmod/NameScraper/6c56522ae73ff9464724d10f477b87278d67d529/core/__init__.py -------------------------------------------------------------------------------- /namescraper.py: -------------------------------------------------------------------------------- 1 | from scrapers.viewdns import ViewDNSScraper 2 | from scrapers.securitytrails import SecurityTrailsScraper 3 | from scrapers.whoisxmlapi import WhoisXMLAPIScraper 4 | from selenium.webdriver.support.ui import WebDriverWait 5 | import undetected_chromedriver as uc 6 | import argparse 7 | import sys 8 | 9 | SUPPORTED_LOOKUPS = { 10 | 'securitytrails': SecurityTrailsScraper.LOOKUP_MAP, 11 | 'viewdns': ViewDNSScraper.LOOKUP_MAP, 12 | 'whoisxmlapi': WhoisXMLAPIScraper.LOOKUP_MAP 13 | } 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser(prog='NameScraper is...') 17 | 18 | parser.add_argument('--timeout', default=10, type=int, 19 | help='Default timeout to use in Selenium when looking for elements in the page.') 20 | parser.add_argument('--output', help='Output results to a file.') 21 | parser.add_argument('--queriesfile', 22 | help='A file with lines to be looked up.') 23 | parser.add_argument('--query', help='The text to query.') 24 | parser.add_argument('--headless', action='store_true', 25 | help='Run the webdriver in headless mode.') 26 | parser.add_argument('--sessionfile', default='session.txt', 27 | help='File with the session cookie for the selected module.') 28 | 29 | subparsers = parser.add_subparsers(dest='module') 30 | 31 | parser_sectrails = subparsers.add_parser('securitytrails', help='SecurityTrails scraper.') 32 | parser_sectrails.add_argument('--lookup', default='subdomains', 33 | choices=SUPPORTED_LOOKUPS['securitytrails'], 34 | help='Type of the lookup to be performed.') 35 | 36 | parser_viewdns = subparsers.add_parser('viewdns', help='ViewDNS scraper.') 37 | parser_viewdns.add_argument('--lookup', default='subdomains', 38 | choices=SUPPORTED_LOOKUPS['viewdns'], 39 | help='Type of the lookup to be performed.') 40 | 41 | parser_whoisxmlapi = subparsers.add_parser('whoisxmlapi', help='WhoisXMLAPI scraper.') 42 | parser_whoisxmlapi.add_argument('--lookup', default='subdomains', 43 | choices=SUPPORTED_LOOKUPS['whoisxmlapi'], 44 | help='Type of the lookup to be performed.') 45 | 46 | args = parser.parse_args() 47 | 48 | specific_options = {} 49 | if args.module == 'securitytrails': 50 | module = 'SecurityTrails' 51 | scraper_class = SecurityTrailsScraper 52 | 53 | with open(args.sessionfile) as sessionfile: 54 | session_cookie = sessionfile.read().rstrip() 55 | 56 | specific_options['session_cookie'] = session_cookie 57 | elif args.module == 'viewdns': 58 | module = 'ViewDNS' 59 | scraper_class = ViewDNSScraper 60 | elif args.module == 'whoisxmlapi': 61 | module = 'WhoisXMLAPI' 62 | scraper_class = WhoisXMLAPIScraper 63 | 64 | with open(args.sessionfile) as sessionfile: 65 | session_cookie = sessionfile.read().rstrip() 66 | 67 | specific_options['session_cookie'] = session_cookie 68 | else: 69 | print('Unknown module selected. Please select one of the available modules (check --help).') 70 | sys.exit(1) 71 | 72 | print(f'[+] Selected module: "{module}"') 73 | lookup = args.lookup 74 | output_filepath = args.output 75 | 76 | # Initialize configs 77 | if output_filepath is not None: 78 | output_file = open(output_filepath, 'a+', encoding='utf-8') 79 | print(f'[+] Saving results to file "{output_filepath}"') 80 | else: 81 | output_file = None 82 | 83 | if not args.queriesfile and not args.query: 84 | print('[-] You must specify at least one of --queriesfile or --query to run the tool.') 85 | sys.exit(1) 86 | 87 | if args.queriesfile: 88 | with open(args.queriesfile) as queries_file: 89 | queries = list(map(lambda x: x.rstrip(), queries_file.readlines())) 90 | else: 91 | queries = [args.query] 92 | 93 | # Initialize Undetected Chrome with provided options 94 | options = uc.ChromeOptions() 95 | if args.headless: 96 | options.add_argument('--headless') 97 | 98 | driver = uc.Chrome(use_subprocess=True, options=options) 99 | driver_wait = WebDriverWait(driver, args.timeout) 100 | 101 | # Initialize scraper object from selected module's class 102 | scraper = scraper_class( 103 | driver, driver_wait, 104 | output_file=output_file, 105 | **specific_options 106 | ) 107 | 108 | # Perform queries 109 | for query in queries: 110 | print(f'[+] Looking up "{query}" ({lookup})') 111 | scraper.lookup(query, lookup_type=lookup) 112 | 113 | if output_file is not None: 114 | output_file.close() 115 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | html2text==2020.1.16 2 | selenium 3 | undetected_chromedriver -------------------------------------------------------------------------------- /scrapers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Macmod/NameScraper/6c56522ae73ff9464724d10f477b87278d67d529/scrapers/__init__.py -------------------------------------------------------------------------------- /scrapers/securitytrails.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.support import expected_conditions as ec 2 | from selenium.webdriver.common.by import By 3 | from json import dumps 4 | import re 5 | 6 | 7 | class SecurityTrailsScraper(): 8 | LOOKUP_MAP = { 9 | 'subdomains': 'https://securitytrails.com/list/apex_domain/%s', 10 | 'reversens': 'https://securitytrails.com/list/ns/%s', 11 | 'reversecname': 'https://securitytrails.com/list/cname/%s', 12 | 'reversemx': 'https://securitytrails.com/list/mx/%s', 13 | 'dnsrecord': 'https://securitytrails.com/domain/%s/dns', 14 | 'historicaldns': 'https://securitytrails.com/domain/%s/history/%s' 15 | } 16 | NEXT_BTN_XPATH = './/*[contains(@class, "tooltip")]//li//a[text()="›"]' 17 | PAGINATION_REGEX = r'- ([\d,KBM+]+) of ([\d,KBM+]+) results' 18 | 19 | def __init__(self, driver, driver_wait, 20 | session_cookie='', output_file=None): 21 | self.driver = driver 22 | self.wait = driver_wait 23 | 24 | self.driver.get('https://securitytrails.com/') 25 | self.driver.maximize_window() 26 | self.driver.add_cookie({ 27 | 'name': 'SecurityTrails', 28 | 'value': session_cookie 29 | }) 30 | 31 | self.output_file = output_file 32 | 33 | def __extract_pagination(self): 34 | try: 35 | pagination_obj = self.wait.until( 36 | ec.presence_of_element_located( 37 | (By.CLASS_NAME, 'pagination-details') 38 | ) 39 | ) 40 | pagination_text = pagination_obj.text.replace('\n', ' ') 41 | pagination_text = pagination_text.replace('\r', ' ') 42 | pagination_numbers = re.search( 43 | SecurityTrailsScraper.PAGINATION_REGEX, 44 | pagination_text 45 | ).groups() 46 | 47 | end_of_page = pagination_numbers[0] 48 | end_of_results = pagination_numbers[1] 49 | except Exception: 50 | end_of_page = 0 51 | end_of_results = 0 52 | 53 | return end_of_page, end_of_results 54 | 55 | def __selenium_scraper_domains(self, domain, lookup_type): 56 | domains = [] 57 | 58 | self.driver.get( 59 | SecurityTrailsScraper.LOOKUP_MAP[lookup_type] % domain 60 | ) 61 | 62 | end_of_page, end_of_results = (100, -1) 63 | while end_of_page != end_of_results: 64 | sample = self.wait.until( 65 | ec.presence_of_all_elements_located( 66 | (By.CSS_SELECTOR, "tbody>tr a") 67 | ) 68 | ) 69 | 70 | try: 71 | page_domains = [el.text for el in sample] 72 | except Exception: 73 | pass 74 | 75 | if self.output_file is not None: 76 | self.output_file.write('\n'.join(page_domains) + '\n') 77 | else: 78 | print('\n'.join(page_domains)) 79 | 80 | domains += page_domains 81 | 82 | end_of_page, end_of_results = self.__extract_pagination() 83 | 84 | try: 85 | next_page_btn = self.wait.until( 86 | ec.presence_of_element_located( 87 | (By.XPATH, SecurityTrailsScraper.NEXT_BTN_XPATH) 88 | ) 89 | ) 90 | next_page_btn.click() 91 | except Exception: 92 | print('[~] Could not find next page. Aborting...') 93 | break 94 | 95 | return domains 96 | 97 | def __selenium_scraper_dns(self, domain): 98 | results = {} 99 | 100 | self.driver.get( 101 | SecurityTrailsScraper.LOOKUP_MAP['dnsrecord'] % domain 102 | ) 103 | 104 | grid_divs = self.wait.until( 105 | ec.presence_of_all_elements_located( 106 | ( 107 | By.CSS_SELECTOR, "#app-content>.grid>div" 108 | ) 109 | ) 110 | ) 111 | 112 | for grid_div in grid_divs: 113 | inner_divs = grid_div.find_elements(By.CSS_SELECTOR, "div") 114 | info_type = inner_divs[0].text 115 | 116 | if info_type.endswith('records'): 117 | records_els = inner_divs[1].find_elements( 118 | By.CSS_SELECTOR, 119 | 'a.link' 120 | ) 121 | records = [r.text for r in records_els] 122 | 123 | normalized_info_type = info_type.replace(' records', '') 124 | 125 | results[normalized_info_type] = records 126 | elif info_type == 'TXT': 127 | records_els = inner_divs[1].find_elements( 128 | By.CSS_SELECTOR, 'span' 129 | ) 130 | records = [r.text for r in records_els] 131 | results['TXT'] = records 132 | else: 133 | continue 134 | 135 | if self.output_file is not None: 136 | self.output_file.write(dumps(results) + '\n') 137 | else: 138 | print(dumps(results)) 139 | 140 | return results 141 | 142 | def __selenium_scraper_historical_dns(self, domain): 143 | results = [] 144 | record_types = ['a', 'aaaa', 'mx', 'ns', 'soa', 'txt'] 145 | 146 | for rtype in record_types: 147 | subresults = [] 148 | 149 | self.driver.get( 150 | SecurityTrailsScraper.LOOKUP_MAP['historical_dns'] % (domain, rtype) 151 | ) 152 | 153 | table = self.wait.until( 154 | ec.presence_of_element_located( 155 | (By.CLASS_NAME, 'ui-table') 156 | ) 157 | ) 158 | keys_els = table.find_elements(By.CSS_SELECTOR, 'thead th') 159 | keys = [k.text for k in keys_els] 160 | 161 | rows_els = table.find_elements(By.CSS_SELECTOR, 'tbody tr') 162 | for row_el in rows_els: 163 | cols_els = row_el.find_elements(By.CSS_SELECTOR, 'td') 164 | 165 | cols = {} 166 | for x in range(len(keys)): 167 | col_value = cols_els[x].text 168 | if '\n' in col_value: 169 | col_value = col_value.split('\n') 170 | cols[keys[x]] = col_value 171 | 172 | subresults.append(cols) 173 | 174 | result_obj = {'type': rtype, 'results': subresults} 175 | if self.output_file is not None: 176 | self.output_file.write(dumps(result_obj) + '\n') 177 | else: 178 | print(dumps(result_obj)) 179 | results.append(result_obj) 180 | 181 | return results 182 | 183 | def lookup(self, domain, lookup_type='subdomains'): 184 | if lookup_type == 'dnsrecord': 185 | scraper = self.__selenium_scraper_dns 186 | elif lookup_type == 'historicaldns': 187 | scraper = self.__selenium_scraper_historical_dns 188 | else: 189 | def generic_domain_scraper(domain): 190 | return self.__selenium_scraper_domains(domain, lookup_type) 191 | 192 | scraper = generic_domain_scraper 193 | 194 | return scraper(domain) 195 | -------------------------------------------------------------------------------- /scrapers/viewdns.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.support import expected_conditions as ec 2 | from selenium.webdriver.common.by import By 3 | import html2text 4 | 5 | 6 | class ViewDNSScraper(): 7 | LOOKUP_MAP = { 8 | 'reverseip': 'https://viewdns.info/reverseip/?host=%s&t=1', 9 | 'reversewhois': 'https://viewdns.info/reversewhois/?q=%s', 10 | 'iphistory': 'https://viewdns.info/iphistory/?domain=%s', 11 | 'dnsreport': 'https://viewdns.info/dnsreport/?domain=%s', 12 | 'reversemx': 'https://viewdns.info/reversemx/?mx=%s', 13 | 'reversens': 'https://viewdns.info/reversens/?ns=%s', 14 | 'iplocation': 'https://viewdns.info/iplocation/?ip=%s', 15 | 'chinesefirewall': 'https://viewdns.info/chinesefirewall/?domain=%s', 16 | 'propagation': 'https://viewdns.info/propagation/?domain=%s', 17 | 'ismysitedown': 'https://viewdns.info/ismysitedown/?domain=%s', 18 | 'iranfirewall': 'https://viewdns.info/iranfirewall/?domain=%s', 19 | 'whois': 'https://viewdns.info/whois/?domain=%s', 20 | 'httpheaders': 'https://viewdns.info/httpheaders/?domain=%s', 21 | 'dnsrecord': 'https://viewdns.info/dnsrecord/?domain=%s', 22 | 'portscan': 'https://viewdns.info/portscan/?host=%s', 23 | 'traceroute': 'https://viewdns.info/traceroute/?host=%s', 24 | 'spamdblookup': 'https://viewdns.info/spamdblookup/?ip=%s', 25 | 'reversedns': 'https://viewdns.info/reversedns/?ip=%s', 26 | 'asnlookup': 'https://viewdns.info/asnlookup/?asn=%s', 27 | 'ping': 'https://viewdns.info/ping/?domain=%s', 28 | 'dnssec': 'https://viewdns.info/dnssec/?domain=%s', 29 | 'abuselookup': 'https://viewdns.info/abuselookup/?domain=%s', 30 | 'maclookup': 'https://viewdns.info/maclookup/?mac=%s', 31 | 'freeemail': 'https://viewdns.info/freeemail/?domain=%s' 32 | } 33 | 34 | def __init__(self, driver, driver_wait, output_file=None, html=False): 35 | self.wait = driver_wait 36 | self.driver = driver 37 | 38 | self.driver.get('https://viewdns.info/') 39 | self.driver.maximize_window() 40 | 41 | self.output_file = output_file 42 | 43 | self.text_maker = html2text.HTML2Text() 44 | self.text_maker.images_to_alt = True 45 | self.text_maker.ignore_tables = False 46 | self.text_maker.bypass_tables = False 47 | self.text_maker.ignore_links = True 48 | 49 | self.html = html 50 | 51 | def __selenium_scraper_generic(self, lookup_url): 52 | self.driver.get(lookup_url) 53 | 54 | results_obj = self.wait.until( 55 | ec.presence_of_element_located( 56 | ( 57 | By.CSS_SELECTOR, 58 | 'tbody td>font[face="Courier"]' 59 | ) 60 | ) 61 | ) 62 | 63 | results = None 64 | if results_obj: 65 | html = results_obj.get_attribute('innerHTML') 66 | if self.html: 67 | results = html 68 | else: 69 | results = self.text_maker.handle(html) 70 | 71 | if self.output_file is not None: 72 | self.output_file.write(results + '\n') 73 | else: 74 | print(results) 75 | 76 | return results 77 | 78 | def lookup(self, query, lookup_type='dnsrecord'): 79 | result = self.__selenium_scraper_generic( 80 | ViewDNSScraper.LOOKUP_MAP[lookup_type] % query 81 | ) 82 | 83 | return result 84 | -------------------------------------------------------------------------------- /scrapers/whoisxmlapi.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.support import expected_conditions as ec 2 | from selenium.webdriver.common.by import By 3 | from selenium.webdriver.common.keys import Keys 4 | from re import search 5 | from json import loads, dumps 6 | import html2text 7 | import tkinter as tk 8 | 9 | 10 | class WhoisXMLAPIScraper(): 11 | LOOKUP_MAP = { 12 | 'subdomains': 'https://subdomains.whoisxmlapi.com/lookup', 13 | 'reversens': 'https://reverse-ns.whoisxmlapi.com/lookup', 14 | 'whois': 'https://whois.whoisxmlapi.com/lookup', 15 | 'whoishistory': 'https://whois-history.whoisxmlapi.com/lookup', 16 | 'reversewhois': 'https://reverse-whois.whoisxmlapi.com/lookup', 17 | 'dnslookup': 'https://dns-lookup.whoisxmlapi.com/lookup', 18 | 'dnshistory': 'https://dns-history.whoisxmlapi.com/lookup', 19 | 'reversemx': 'https://reverse-mx.whoisxmlapi.com/lookup', 20 | 'ipgeolocation': 'https://ip-geolocation.whoisxmlapi.com/lookup', 21 | 'ipnetblocks': 'https://ip-netblocks.whoisxmlapi.com/lookup', 22 | 'websitecontacts': 'https://website-contacts.whoisxmlapi.com/lookup', 23 | 'websitecategorization': 'https://website-categorization.whoisxmlapi.com/lookup', 24 | 'domainavailability': 'https://domain-availability.whoisxmlapi.com/lookup', 25 | 'emailverification': 'https://emailverification.whoisxmlapi.com/lookup' 26 | } 27 | NEXT_BTN_XPATH = '//*[contains(@class, "common-colored-link") and contains(text(), "Next ")]' 28 | 29 | def __init__(self, driver, driver_wait, 30 | session_cookie='', 31 | output_file=None, html=False): 32 | self.driver = driver 33 | self.driver.get('https://www.whoisxmlapi.com/') 34 | self.driver.maximize_window() 35 | self.driver.add_cookie({ 36 | 'name': 'emailverification_session', 37 | 'value': session_cookie, 38 | 'domain': '.whoisxmlapi.com' 39 | }) 40 | 41 | self.wait = driver_wait 42 | self.output_file = output_file 43 | 44 | self.text_maker = html2text.HTML2Text() 45 | self.text_maker.images_to_alt = True 46 | self.text_maker.ignore_tables = False 47 | self.text_maker.bypass_tables = False 48 | self.text_maker.ignore_links = True 49 | 50 | self.html = html 51 | 52 | def __selenium_scraper_generic(self, lookup_url, query): 53 | results = [] 54 | self.driver.get(lookup_url) 55 | 56 | input_obj = self.wait.until( 57 | ec.presence_of_element_located( 58 | ( 59 | By.CSS_SELECTOR, 60 | 'input[name="search"]' 61 | ) 62 | ) 63 | ) 64 | 65 | input_obj.clear() 66 | input_obj.send_keys(Keys.CONTROL + "a") 67 | input_obj.send_keys(query); 68 | 69 | submit_btn_obj = self.wait.until( 70 | ec.presence_of_element_located( 71 | ( 72 | By.CSS_SELECTOR, 73 | 'button[type="submit"]' 74 | ) 75 | ) 76 | ) 77 | 78 | submit_btn_obj.click() 79 | 80 | next_page = True 81 | while next_page: 82 | try: 83 | copy_to_clipboard_btn = self.wait.until( 84 | ec.presence_of_all_elements_located( 85 | ( 86 | By.CSS_SELECTOR, 87 | '.lookup-icons span' 88 | ) 89 | ) 90 | ) 91 | copy_to_clipboard_btn[0].click() 92 | except Exception: 93 | continue 94 | 95 | result = loads(tk.Tk().clipboard_get()) 96 | 97 | if self.output_file is not None: 98 | self.output_file.write(dumps(result) + '\n') 99 | else: 100 | print(dumps(result)) 101 | 102 | results.append(result) 103 | 104 | try: 105 | link = self.wait.until( 106 | ec.presence_of_element_located( 107 | ( 108 | By.XPATH, 109 | WhoisXMLAPIScraper.NEXT_BTN_XPATH 110 | ) 111 | ) 112 | ) 113 | 114 | link.click() 115 | except Exception: 116 | print('[~] Could not find next page. Aborting...') 117 | next_page = False 118 | 119 | return results 120 | 121 | def lookup(self, query, lookup_type='subdomains'): 122 | result = self.__selenium_scraper_generic( 123 | WhoisXMLAPIScraper.LOOKUP_MAP[lookup_type], query 124 | ) 125 | 126 | return result 127 | --------------------------------------------------------------------------------