├── LICENSE
├── README.md
├── cms.py
├── domain_name.py
├── extractPII.py
├── general.py
├── install.sh
├── ip_addr.py
├── main.py
├── nikto.py
├── nmap.py
├── nslookup.py
├── requirements.txt
├── robots_txt.py
├── traceroute.py
├── webcrawler.py
└── whois.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Akshay7591
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Web-Scanner
2 | Installation and Features--> https://ambhalerao12.medium.com/web-scanner-an-automated-footprinting-tool-d5734eeedbcf
3 | Soon subdomain enumeration on the target domain using a dictionary attack (this would list all the subdomains of the domain that we are scanning) and docker support would also be added!
4 |
5 |
6 |
--------------------------------------------------------------------------------
/cms.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlopen
2 | import io
3 |
4 |
5 |
6 | def get_cms(url):
7 | if url.endswith('/'):
8 | path=url
9 | else:
10 | path=url+'/'
11 | print('-------------------------------------')
12 | print("Detecting CMS used.....")
13 | try:
14 | html=urlopen(path+"robots.txt")
15 | c=html.read().decode('utf-8')
16 | if '/wp-admin/' in c:
17 | print("WordPress")
18 | elif '/_api/*' in c:
19 | print("Wix")
20 | elif 'Joomla' in c:
21 | print("Joomla")
22 | else:
23 | print("Cannot Identify CMS used")
24 | return(html.read().decode('utf-8'))
25 | except:
26 | print("Cannot Identify CMS used")
27 |
28 |
29 |
--------------------------------------------------------------------------------
/domain_name.py:
--------------------------------------------------------------------------------
1 | from tld import get_fld
2 |
3 | def get_domain_name(url):
4 | domain_name = get_fld(url)
5 | print('-------------------------------------')
6 | print("Domain Name is: "+domain_name)
7 | return domain_name
8 |
9 |
--------------------------------------------------------------------------------
/extractPII.py:
--------------------------------------------------------------------------------
1 |
2 | import re
3 | from requests_html import HTMLSession
4 | import ssl
5 | from bs4 import BeautifulSoup
6 |
7 |
8 | def getInfo(url):
9 | global emails, numbers
10 | session = HTMLSession()
11 | ssl._create_default_https_context = ssl._create_unverified_context
12 | html = session.get(url)
13 | html_source_code = html.text
14 | numbers = (re.findall(r"tel:[0-9]{10}",html_source_code))
15 | emails = (re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}",html_source_code))
16 | i=0
17 | print('-------------------------------------')
18 | print("Emails found on this website-")
19 | for name in emails:
20 | print(emails[i])
21 | i+=1
22 | print('-------------------------------------')
23 | j=0
24 | print("Phone numbers found on this website-")
25 | for name in numbers:
26 | print(numbers[i])
27 | j+=1
28 | return 1
29 |
30 |
31 |
--------------------------------------------------------------------------------
/general.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | def create_dir(directory):
4 | if not os.path.exists(direcrory):
5 | os.makedirs(directory)
6 |
7 | def write_file(path, data):
8 | f=open(path,'w')
9 | f.write(data)
10 | f.close
11 |
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | echo 'Installing Web Scanner....'
2 | sudo apt-get update -y
3 | sudo apt-get install python3-pip -y
4 | python3 -m pip install -r requirements.txt
5 | sudo apt-get install nikto -y
6 | sudo apt-get install nmap -y
7 | sudo apt-get install whois -y
8 | sudo apt-get install traceroute -y
9 | sudo apt-get install dnsutils -y
10 |
--------------------------------------------------------------------------------
/ip_addr.py:
--------------------------------------------------------------------------------
1 |
2 | import socket
3 |
4 |
5 | def get_ip_address(url):
6 | results = socket.gethostbyname(url)
7 | print('------------------------------------')
8 | print("IP Address is: "+results)
9 | return results
10 |
11 |
12 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from general import *
2 | from domain_name import *
3 | from whois import *
4 | from robots_txt import *
5 | from nmap import *
6 | from ip_addr import *
7 | from traceroute import *
8 | from extractPII import *
9 | from webcrawler import *
10 | from nslookup import *
11 | from cms import *
12 | from nikto import *
13 | import os
14 | from art import *
15 |
16 | root_dir='Scanned_Websites'
17 | os.system('clear')
18 | os.system('mkdir '+root_dir)
19 |
20 | def gather_info(name,url):
21 | domain_name=get_domain_name(url)
22 | ip_addr=get_ip_address(domain_name)
23 | nmap=get_nmap('-F',ip_addr)
24 | robots_txt=get_robots_txt(url)
25 | get_cms(url)
26 | whois=get_whois(domain_name)
27 | print('------------------------')
28 | print('Whois Scan of the given URL-')
29 | print('\n')
30 | print(whois)
31 | tracert(domain_name)
32 | getLinks(url)
33 | getInfo(url)
34 | getDNSInfo(domain_name)
35 | get_nikto('-h',ip_addr)
36 | print('------------------------')
37 |
38 | def create_report(name,full_url,domain_name,nmap,robots_txt,ip_addr):
39 | project_dir = root_dir + '/' + name
40 | os.system('mkdir '+project_dir)
41 | write_file(project_dir + '/full_url.txt' , full_url)
42 | write_file(project_dir + '/domain_name.txt' , domain_name)
43 | write_file(project_dir + '/nmap.txt' , nmap)
44 | write_file(project_dir + '/robots.txt' , robots_txt)
45 | write_file(project_dir + '/ip_addr.txt' , ip_addr)
46 |
47 | tprint('Web Scanner')
48 | a=str(input('Enter project name:'))
49 | print("Enter site in the following format: http://www.")
50 | print("May not work for https websites")
51 | x=str(input('Enter site to scan:'))
52 | gather_info(a,x)
53 |
54 |
--------------------------------------------------------------------------------
/nikto.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | def get_nikto(options,ip):
4 | print('-------------------------------------')
5 | x=str(input("Would you like to perform a Nikto Scan-(y/n)?"))
6 | if x=="y":
7 | print('Nikto Scan Results-')
8 | os.system('nikto -h '+ip)
9 | else:
10 | print("Not perfoming nikto scan")
11 |
--------------------------------------------------------------------------------
/nmap.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | def get_nmap(options,ip):
4 | print('-------------------------------------')
5 | print('Nmap Scan Results-')
6 | command="nmap " + options + " " + ip
7 | process=os.popen(command)
8 | results=str(process.read())
9 | print(results)
10 | return results
11 |
12 |
13 |
--------------------------------------------------------------------------------
/nslookup.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import sys
3 |
4 | def getDNSInfo(url):
5 | print('-------------------')
6 | print("NSLOOKUP-")
7 | types = ["A", "NS", "CNAME", "AAAA", "MX", "ALIAS", "PTR","SOA"]
8 | for type in types:
9 | command = "nslookup -type=" + type + " " + url
10 | process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
11 | output, error = process.communicate()
12 |
13 | if(error):
14 | print(error)
15 | print(output.decode("utf=8"))
16 | return(output)
17 |
18 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | art
2 | bs4
3 | requests_html
4 | tld
5 |
--------------------------------------------------------------------------------
/robots_txt.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlopen
2 | import io
3 |
4 |
5 |
6 | def get_robots_txt(url):
7 | if url.endswith('/'):
8 | path=url
9 | else:
10 | path=url+'/'
11 | print('-------------------------------------')
12 | try:
13 | html=urlopen(path+"robots.txt")
14 | print('robots.txt of the following site has the following data:')
15 | print(html.read().decode('utf-8'))
16 | return(html.read().decode('utf-8'))
17 | except:
18 | print("Cannot get robots.txt of the given website")
19 |
20 |
--------------------------------------------------------------------------------
/traceroute.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | def tracert(x):
4 | print('-------------------')
5 | print('Traceroute-')
6 | os.system('traceroute '+x)
7 |
--------------------------------------------------------------------------------
/webcrawler.py:
--------------------------------------------------------------------------------
1 | from requests_html import HTMLSession
2 | from bs4 import BeautifulSoup
3 |
4 | def getLinks(url):
5 | links = []
6 | session = HTMLSession()
7 | response = session.get(url)
8 | soup = BeautifulSoup(response.text,'lxml')
9 | for link in soup.find_all('a',href=True):
10 | if(link['href'].startswith('./')):
11 | link['href'] = url + link['href']
12 | if(link['href'].startswith('/')):
13 | link['href'] = url + link['href']
14 | if(link['href'].startswith('#')):
15 | continue
16 | if(link['href'].startswith('http')):
17 | links.append(link['href'])
18 | print('-------------------------------------')
19 | print("Crawling the target website.....")
20 | print("Links presesnt on this website-")
21 | i=0
22 | for link in links:
23 | print(link)
24 | return links
25 |
26 |
27 |
--------------------------------------------------------------------------------
/whois.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def get_whois(url):
5 | command="whois "+url
6 | process=os.popen(command)
7 | results=str(process.read())
8 | return results
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------