├── LICENSE ├── README.md ├── cms.py ├── domain_name.py ├── extractPII.py ├── general.py ├── install.sh ├── ip_addr.py ├── main.py ├── nikto.py ├── nmap.py ├── nslookup.py ├── requirements.txt ├── robots_txt.py ├── traceroute.py ├── webcrawler.py └── whois.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Akshay7591 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Web-Scanner 2 | Installation and Features--> https://ambhalerao12.medium.com/web-scanner-an-automated-footprinting-tool-d5734eeedbcf
3 | Soon subdomain enumeration on the target domain using a dictionary attack (this would list all the subdomains of the domain that we are scanning) and docker support would also be added!
4 |
5 |
6 | -------------------------------------------------------------------------------- /cms.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | import io 3 | 4 | 5 | 6 | def get_cms(url): 7 | if url.endswith('/'): 8 | path=url 9 | else: 10 | path=url+'/' 11 | print('-------------------------------------') 12 | print("Detecting CMS used.....") 13 | try: 14 | html=urlopen(path+"robots.txt") 15 | c=html.read().decode('utf-8') 16 | if '/wp-admin/' in c: 17 | print("WordPress") 18 | elif '/_api/*' in c: 19 | print("Wix") 20 | elif 'Joomla' in c: 21 | print("Joomla") 22 | else: 23 | print("Cannot Identify CMS used") 24 | return(html.read().decode('utf-8')) 25 | except: 26 | print("Cannot Identify CMS used") 27 | 28 | 29 | -------------------------------------------------------------------------------- /domain_name.py: -------------------------------------------------------------------------------- 1 | from tld import get_fld 2 | 3 | def get_domain_name(url): 4 | domain_name = get_fld(url) 5 | print('-------------------------------------') 6 | print("Domain Name is: "+domain_name) 7 | return domain_name 8 | 9 | -------------------------------------------------------------------------------- /extractPII.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | from requests_html import HTMLSession 4 | import ssl 5 | from bs4 import BeautifulSoup 6 | 7 | 8 | def getInfo(url): 9 | global emails, numbers 10 | session = HTMLSession() 11 | ssl._create_default_https_context = ssl._create_unverified_context 12 | html = session.get(url) 13 | html_source_code = html.text 14 | numbers = (re.findall(r"tel:[0-9]{10}",html_source_code)) 15 | emails = (re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}",html_source_code)) 16 | i=0 17 | print('-------------------------------------') 18 | print("Emails found on this website-") 19 | for name in emails: 20 | print(emails[i]) 21 | i+=1 22 | print('-------------------------------------') 23 | j=0 24 | print("Phone numbers found on this website-") 25 | for name in numbers: 26 | print(numbers[i]) 27 | j+=1 28 | return 1 29 | 30 | 31 | -------------------------------------------------------------------------------- /general.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def create_dir(directory): 4 | if not os.path.exists(direcrory): 5 | os.makedirs(directory) 6 | 7 | def write_file(path, data): 8 | f=open(path,'w') 9 | f.write(data) 10 | f.close 11 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | echo 'Installing Web Scanner....' 2 | sudo apt-get update -y 3 | sudo apt-get install python3-pip -y 4 | python3 -m pip install -r requirements.txt 5 | sudo apt-get install nikto -y 6 | sudo apt-get install nmap -y 7 | sudo apt-get install whois -y 8 | sudo apt-get install traceroute -y 9 | sudo apt-get install dnsutils -y 10 | -------------------------------------------------------------------------------- /ip_addr.py: -------------------------------------------------------------------------------- 1 | 2 | import socket 3 | 4 | 5 | def get_ip_address(url): 6 | results = socket.gethostbyname(url) 7 | print('------------------------------------') 8 | print("IP Address is: "+results) 9 | return results 10 | 11 | 12 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from general import * 2 | from domain_name import * 3 | from whois import * 4 | from robots_txt import * 5 | from nmap import * 6 | from ip_addr import * 7 | from traceroute import * 8 | from extractPII import * 9 | from webcrawler import * 10 | from nslookup import * 11 | from cms import * 12 | from nikto import * 13 | import os 14 | from art import * 15 | 16 | root_dir='Scanned_Websites' 17 | os.system('clear') 18 | os.system('mkdir '+root_dir) 19 | 20 | def gather_info(name,url): 21 | domain_name=get_domain_name(url) 22 | ip_addr=get_ip_address(domain_name) 23 | nmap=get_nmap('-F',ip_addr) 24 | robots_txt=get_robots_txt(url) 25 | get_cms(url) 26 | whois=get_whois(domain_name) 27 | print('------------------------') 28 | print('Whois Scan of the given URL-') 29 | print('\n') 30 | print(whois) 31 | tracert(domain_name) 32 | getLinks(url) 33 | getInfo(url) 34 | getDNSInfo(domain_name) 35 | get_nikto('-h',ip_addr) 36 | print('------------------------') 37 | 38 | def create_report(name,full_url,domain_name,nmap,robots_txt,ip_addr): 39 | project_dir = root_dir + '/' + name 40 | os.system('mkdir '+project_dir) 41 | write_file(project_dir + '/full_url.txt' , full_url) 42 | write_file(project_dir + '/domain_name.txt' , domain_name) 43 | write_file(project_dir + '/nmap.txt' , nmap) 44 | write_file(project_dir + '/robots.txt' , robots_txt) 45 | write_file(project_dir + '/ip_addr.txt' , ip_addr) 46 | 47 | tprint('Web Scanner') 48 | a=str(input('Enter project name:')) 49 | print("Enter site in the following format: http://www.") 50 | print("May not work for https websites") 51 | x=str(input('Enter site to scan:')) 52 | gather_info(a,x) 53 | 54 | -------------------------------------------------------------------------------- /nikto.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def get_nikto(options,ip): 4 | print('-------------------------------------') 5 | x=str(input("Would you like to perform a Nikto Scan-(y/n)?")) 6 | if x=="y": 7 | print('Nikto Scan Results-') 8 | os.system('nikto -h '+ip) 9 | else: 10 | print("Not perfoming nikto scan") 11 | -------------------------------------------------------------------------------- /nmap.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def get_nmap(options,ip): 4 | print('-------------------------------------') 5 | print('Nmap Scan Results-') 6 | command="nmap " + options + " " + ip 7 | process=os.popen(command) 8 | results=str(process.read()) 9 | print(results) 10 | return results 11 | 12 | 13 | -------------------------------------------------------------------------------- /nslookup.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | def getDNSInfo(url): 5 | print('-------------------') 6 | print("NSLOOKUP-") 7 | types = ["A", "NS", "CNAME", "AAAA", "MX", "ALIAS", "PTR","SOA"] 8 | for type in types: 9 | command = "nslookup -type=" + type + " " + url 10 | process = subprocess.Popen(command.split(), stdout=subprocess.PIPE) 11 | output, error = process.communicate() 12 | 13 | if(error): 14 | print(error) 15 | print(output.decode("utf=8")) 16 | return(output) 17 | 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | art 2 | bs4 3 | requests_html 4 | tld 5 | -------------------------------------------------------------------------------- /robots_txt.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | import io 3 | 4 | 5 | 6 | def get_robots_txt(url): 7 | if url.endswith('/'): 8 | path=url 9 | else: 10 | path=url+'/' 11 | print('-------------------------------------') 12 | try: 13 | html=urlopen(path+"robots.txt") 14 | print('robots.txt of the following site has the following data:') 15 | print(html.read().decode('utf-8')) 16 | return(html.read().decode('utf-8')) 17 | except: 18 | print("Cannot get robots.txt of the given website") 19 | 20 | -------------------------------------------------------------------------------- /traceroute.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def tracert(x): 4 | print('-------------------') 5 | print('Traceroute-') 6 | os.system('traceroute '+x) 7 | -------------------------------------------------------------------------------- /webcrawler.py: -------------------------------------------------------------------------------- 1 | from requests_html import HTMLSession 2 | from bs4 import BeautifulSoup 3 | 4 | def getLinks(url): 5 | links = [] 6 | session = HTMLSession() 7 | response = session.get(url) 8 | soup = BeautifulSoup(response.text,'lxml') 9 | for link in soup.find_all('a',href=True): 10 | if(link['href'].startswith('./')): 11 | link['href'] = url + link['href'] 12 | if(link['href'].startswith('/')): 13 | link['href'] = url + link['href'] 14 | if(link['href'].startswith('#')): 15 | continue 16 | if(link['href'].startswith('http')): 17 | links.append(link['href']) 18 | print('-------------------------------------') 19 | print("Crawling the target website.....") 20 | print("Links presesnt on this website-") 21 | i=0 22 | for link in links: 23 | print(link) 24 | return links 25 | 26 | 27 | -------------------------------------------------------------------------------- /whois.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_whois(url): 5 | command="whois "+url 6 | process=os.popen(command) 7 | results=str(process.read()) 8 | return results 9 | 10 | 11 | 12 | --------------------------------------------------------------------------------