├── LICENSE
├── README.md
├── cms.py
├── domain_name.py
├── extractPII.py
├── general.py
├── install.sh
├── ip_addr.py
├── main.py
├── nikto.py
├── nmap.py
├── nslookup.py
├── requirements.txt
├── robots_txt.py
├── traceroute.py
├── webcrawler.py
└── whois.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Akshay7591
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Web-Scanner
2 | Installation and Features--> https://ambhalerao12.medium.com/web-scanner-an-automated-footprinting-tool-d5734eeedbcf  <br>
3 | Soon subdomain enumeration on the target domain using a dictionary attack (this would list all the subdomains of the domain that we are scanning) and docker support would also be added!  <br>
4 |  <br>
5 | <br>
6 | 


--------------------------------------------------------------------------------
/cms.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import urlopen
 2 | import io
 3 | 
 4 | 
 5 | 
 6 | def get_cms(url):
 7 |     if url.endswith('/'):
 8 |         path=url
 9 |     else:
10 |         path=url+'/'
11 |     print('-------------------------------------')
12 |     print("Detecting CMS used.....")
13 |     try:
14 |         html=urlopen(path+"robots.txt")
15 |         c=html.read().decode('utf-8')
16 |         if '/wp-admin/' in c:
17 |             print("WordPress")
18 |         elif '/_api/*' in c:
19 |             print("Wix")
20 |         elif 'Joomla' in c:
21 |             print("Joomla")
22 |         else:
23 |             print("Cannot Identify CMS used")  
24 |         return(html.read().decode('utf-8'))
25 |     except:
26 |         print("Cannot Identify CMS used")
27 |   
28 |  
29 | 


--------------------------------------------------------------------------------
/domain_name.py:
--------------------------------------------------------------------------------
1 | from tld import get_fld
2 | 
3 | def get_domain_name(url):
4 |     domain_name = get_fld(url)
5 |     print('-------------------------------------')
6 |     print("Domain Name is: "+domain_name)
7 |     return domain_name
8 | 
9 | 


--------------------------------------------------------------------------------
/extractPII.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import re
 3 | from requests_html import HTMLSession
 4 | import ssl
 5 | from bs4 import BeautifulSoup
 6 | 
 7 | 
 8 | def getInfo(url):
 9 |     global emails, numbers
10 |     session = HTMLSession()
11 |     ssl._create_default_https_context = ssl._create_unverified_context
12 |     html = session.get(url)
13 |     html_source_code = html.text
14 |     numbers = (re.findall(r"tel:[0-9]{10}",html_source_code))
15 |     emails = (re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}",html_source_code))   
16 |     i=0
17 |     print('-------------------------------------')
18 |     print("Emails found on this website-")
19 |     for name in emails:
20 |         print(emails[i])
21 |         i+=1
22 |     print('-------------------------------------')
23 |     j=0
24 |     print("Phone numbers found on this website-")
25 |     for name in numbers:
26 |         print(numbers[i])
27 |         j+=1
28 |     return 1
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/general.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def create_dir(directory):
 4 |     if not os.path.exists(direcrory):
 5 |         os.makedirs(directory)
 6 | 
 7 | def write_file(path, data):
 8 |     f=open(path,'w')
 9 |     f.write(data)
10 |     f.close
11 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | echo 'Installing Web Scanner....'
 2 | sudo apt-get update -y
 3 | sudo apt-get install python3-pip -y
 4 | python3 -m pip install -r requirements.txt
 5 | sudo apt-get install nikto -y
 6 | sudo apt-get install nmap -y
 7 | sudo apt-get install whois -y
 8 | sudo apt-get install traceroute -y
 9 | sudo apt-get install dnsutils -y
10 | 


--------------------------------------------------------------------------------
/ip_addr.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import socket
 3 | 
 4 | 
 5 | def get_ip_address(url):
 6 |     results = socket.gethostbyname(url)
 7 |     print('------------------------------------')
 8 |     print("IP Address is: "+results)
 9 |     return results
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from general import *
 2 | from domain_name import *
 3 | from whois import *
 4 | from robots_txt import *
 5 | from nmap import *
 6 | from ip_addr import *
 7 | from traceroute import *
 8 | from extractPII import *
 9 | from webcrawler import *
10 | from nslookup import *
11 | from cms import *
12 | from nikto import *
13 | import os
14 | from art import *
15 | 
16 | root_dir='Scanned_Websites'
17 | os.system('clear')
18 | os.system('mkdir '+root_dir)
19 | 
20 | def gather_info(name,url):
21 |     domain_name=get_domain_name(url)
22 |     ip_addr=get_ip_address(domain_name)
23 |     nmap=get_nmap('-F',ip_addr)
24 |     robots_txt=get_robots_txt(url)
25 |     get_cms(url)
26 |     whois=get_whois(domain_name)
27 |     print('------------------------')
28 |     print('Whois Scan of the given URL-')
29 |     print('\n')
30 |     print(whois)
31 |     tracert(domain_name)
32 |     getLinks(url)
33 |     getInfo(url)
34 |     getDNSInfo(domain_name)
35 |     get_nikto('-h',ip_addr)
36 |     print('------------------------')
37 | 
38 | def create_report(name,full_url,domain_name,nmap,robots_txt,ip_addr):
39 |     project_dir = root_dir + '/' + name
40 |     os.system('mkdir '+project_dir)
41 |     write_file(project_dir + '/full_url.txt' , full_url)
42 |     write_file(project_dir + '/domain_name.txt' , domain_name)
43 |     write_file(project_dir + '/nmap.txt' , nmap)
44 |     write_file(project_dir + '/robots.txt' , robots_txt)
45 |     write_file(project_dir + '/ip_addr.txt' , ip_addr)
46 | 
47 | tprint('Web Scanner')
48 | a=str(input('Enter project name:'))
49 | print("Enter site in the following format: http://www.<website>")
50 | print("May not work for https websites")
51 | x=str(input('Enter site to scan:'))
52 | gather_info(a,x)
53 |     
54 | 


--------------------------------------------------------------------------------
/nikto.py:
--------------------------------------------------------------------------------
 1 | import os 
 2 | 
 3 | def get_nikto(options,ip):
 4 |     print('-------------------------------------')
 5 |     x=str(input("Would you like to perform a Nikto Scan-(y/n)?"))
 6 |     if x=="y":
 7 |         print('Nikto Scan Results-')
 8 |         os.system('nikto -h '+ip)
 9 |     else:
10 |         print("Not perfoming nikto scan")
11 | 


--------------------------------------------------------------------------------
/nmap.py:
--------------------------------------------------------------------------------
 1 | import os 
 2 | 
 3 | def get_nmap(options,ip):
 4 | 	print('-------------------------------------')
 5 | 	print('Nmap Scan Results-')
 6 | 	command="nmap " + options + " " + ip
 7 | 	process=os.popen(command)
 8 | 	results=str(process.read())
 9 | 	print(results)
10 | 	return results
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/nslookup.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | def getDNSInfo(url):
 5 |     print('-------------------')
 6 |     print("NSLOOKUP-")
 7 |     types = ["A", "NS", "CNAME", "AAAA", "MX", "ALIAS", "PTR","SOA"]
 8 |     for type in types:
 9 |         command = "nslookup -type=" + type + " " + url
10 |         process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
11 |         output, error = process.communicate()
12 |         
13 |         if(error):
14 |             print(error)
15 |     print(output.decode("utf=8"))
16 |     return(output)
17 | 
18 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | art
2 | bs4
3 | requests_html
4 | tld
5 | 


--------------------------------------------------------------------------------
/robots_txt.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import urlopen
 2 | import io
 3 | 
 4 | 
 5 | 
 6 | def get_robots_txt(url):
 7 |     if url.endswith('/'):
 8 |         path=url
 9 |     else:
10 |         path=url+'/'
11 |     print('-------------------------------------')
12 |     try:
13 |         html=urlopen(path+"robots.txt")
14 |         print('robots.txt of the following site has the following data:')
15 |         print(html.read().decode('utf-8'))
16 |         return(html.read().decode('utf-8'))
17 |     except:
18 |         print("Cannot get robots.txt of the given website")
19 | 
20 | 


--------------------------------------------------------------------------------
/traceroute.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | def tracert(x):
4 |     print('-------------------')
5 |     print('Traceroute-')
6 |     os.system('traceroute '+x)
7 | 


--------------------------------------------------------------------------------
/webcrawler.py:
--------------------------------------------------------------------------------
 1 | from requests_html import HTMLSession
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | def getLinks(url):
 5 |     links = []
 6 |     session = HTMLSession()
 7 |     response = session.get(url)
 8 |     soup = BeautifulSoup(response.text,'lxml')
 9 |     for link in soup.find_all('a',href=True):
10 |         if(link['href'].startswith('./')):
11 |             link['href'] = url + link['href']
12 |         if(link['href'].startswith('/')):
13 |             link['href'] = url + link['href']
14 |         if(link['href'].startswith('#')):
15 |             continue
16 |         if(link['href'].startswith('http')):
17 |             links.append(link['href'])
18 |     print('-------------------------------------')
19 |     print("Crawling the target website.....")
20 |     print("Links presesnt on this website-")
21 |     i=0
22 |     for link in links:
23 |         print(link)
24 |     return links
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/whois.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_whois(url):
 5 |     command="whois "+url
 6 |     process=os.popen(command)
 7 |     results=str(process.read())
 8 |     return results
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------