├── .gitignore ├── .travis.yml ├── LICENSE ├── README.rst ├── test ├── test_breach.py ├── test_clickjack.py ├── test_crawler.py ├── test_crlf.py ├── test_csrf.py ├── test_exotic_characters.py ├── test_form.py ├── test_html_parser.py ├── test_input.py ├── test_page.py ├── test_scan_cookies.py ├── test_textarea.py ├── test_utils.py ├── test_xss.py ├── tutil.py └── web_runner.py └── webvulnscan ├── __init__.py ├── __main__.py ├── attacks ├── __init__.py ├── breach.py ├── clickjack.py ├── cookiescan.py ├── crlf.py ├── csrf.py ├── exotic_characters.py └── xss.py ├── client.py ├── compat.py ├── crawler.py ├── form.py ├── form_input.py ├── html_parser.py ├── log.py ├── options.py ├── page.py ├── request.py ├── textarea.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | .cover_html 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # Vim 39 | *.swp 40 | *.swo 41 | *.swl 42 | *.swm 43 | *.swn 44 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.3" 5 | install: 6 | - pip install pep8 --use-mirrors 7 | - pip install coverage --use-mirrors 8 | script: 9 | - pep8 webvulnscan test 10 | - nosetests test --verbose --with-coverage --cover-package=webvulnscan --cover-min-percentage=70 11 | notifications: 12 | email: 13 | - phihag@phihag.de 14 | - liebig.richard@hotmail.com 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Richard Liebig 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | webvulnscan 3 | =========== 4 | 5 | .. image:: https://travis-ci.org/hhucn/webvulnscan.png?branch=master 6 | :target: https://travis-ci.org/hhucn/webvulnscan/builds 7 | 8 | Quickstart 9 | ---------- 10 | 11 | .. code:: sh 12 | 13 | $ git clone https://github.com/hhucn/webvulnscan.git 14 | $ cd webvulnscan 15 | $ python -m webvulnscan http://example.target/ 16 | 17 | What is it? 18 | ----------- 19 | As the name suggests, webvulnscan is (or wants to be someday) a security scanner for Web Applications with the intent of automatic testing, licensed under the MIT-License. It's written in Python(compatible with 2.7 and 3.3) and doesn't require any external libraries. 20 | 21 | Features 22 | -------- 23 | - Link & Form Crawling 24 | - Detection for XSS, CRSF, Breach, Clickjacking and cacheable Cookies 25 | - White- and Blacklisting of Pages 26 | - Authentification 27 | 28 | Examples 29 | -------- 30 | 31 | vulnsrv 32 | ~~~~~~~ 33 | 34 | vulnsrv_ is sample exploitable website for educational purposes. We will use it here as an example: 35 | 36 | .. _vulnsrv: https://github.com/phihag/vulnsrv 37 | 38 | .. code:: sh 39 | 40 | $ wget https://raw.github.com/phihag/vulnsrv/master/vulnsrv.py 41 | $ python vulnsrv.py 42 | 43 | It's running now under http://localhost:8666/ on your computer. Open now a new console for running webvulnsrv. Assuming that you are in your home directory and already cloned webvulnscan... 44 | 45 | .. code:: sh 46 | 47 | $ cd webvulnscan 48 | $ python -m webvulnscan http://localhost:8666/ 49 | Vulnerability: CSRF under http://localhost:8666/csrf/send 50 | Vulnerability: XSS on http://localhost:8666/xss/?username=Benutzer%21 in parameter username 51 | 52 | You may notice that this aren't all vulnerabilties, but webvulnsrv is still a work in progress. 53 | 54 | Specific scanning 55 | ~~~~~~~~~~~~~~~~~ 56 | 57 | If you want to scan only for specific vulnerabilities(for example, only for BREACH), you simply try the following: 58 | 59 | .. code:: sh 60 | 61 | $ python -m webvulnscan --breach http://localhost:8666/ 62 | 63 | or you want to scan for XSS and CSRF vulnerabilities: 64 | 65 | .. code:: sh 66 | 67 | $ python -m webvulnscan --xss --csrf http://localhost:8666/ 68 | 69 | What if you want to be more specific? What if you want to test only one site? Use --no-crawl 70 | 71 | .. code:: sh 72 | 73 | $ python -m webvulnscan --no-crawl http://localhost:8666/ 74 | 75 | And the links will be ignored. However, Forms are not. 76 | 77 | White- and Blacklisting 78 | ~~~~~~~~~~~~~~~~~~~~~~~ 79 | 80 | Sometimes, you have links on a site you that you want to test to. But the default whitelist points only on the host of the given link. Here's how you can add more: 81 | 82 | .. code:: sh 83 | 84 | $ python -m webvulnscan --whitelist http://ex.am.ple/ http://localhost/ 85 | 86 | However, what if you want to use Authentification and there's a /logout-Link? If the crawler hits it, the session is lost. Simply Blacklist it! 87 | 88 | .. code:: sh 89 | 90 | $ python -m webvulnscan --blacklist logout http://localhost/ 91 | 92 | And the site will be never visited. Please note that the blacklist Parameter accepts Regular Expressions, the python version. 93 | 94 | Authentification 95 | ~~~~~~~~~~~~~~~~ 96 | 97 | We have a login handler under /perform_login which wants the post-fields username and password, who can we log in? The account we want to use has the username "abc" and password "123456". The command would look like the following: 98 | 99 | .. code:: sh 100 | 101 | $ python -m webvulnscan --auth http://no.tld/perform_login --auth-data username=abc --auth-data password=123456 http://no.tld/ 102 | 103 | Yes, you have to use the --auth-data option for every field you want to send. 104 | 105 | Configuration 106 | ~~~~~~~~~~~~~ 107 | 108 | As you see, there you end up with a lot of parameters in the end. To avoid typing so much, you can add the --write-out-Option and 109 | 110 | .. code:: sh 111 | 112 | $ python -m webvulnscan --write-out=example.conf http://localhost:8666/ 113 | 114 | save it to a file. If you want to rerun the test because you (think you) fixed it, simply run: 115 | 116 | .. code:: sh 117 | 118 | $ python -m webvulnscan -c example.conf 119 | -------------------------------------------------------------------------------- /test/test_breach.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import zlib 3 | import io 4 | import unittest 5 | 6 | import tutil 7 | import webvulnscan.attacks.breach 8 | from webvulnscan.page import Page 9 | 10 | 11 | try: 12 | from urllib.parse import unquote 13 | except ImportError: 14 | from urllib2 import unquote 15 | 16 | 17 | def _gzip_test_controller(html): 18 | def on_request(request): 19 | content = html.encode('utf-8') 20 | out_headers = {'Content-Type': 'text/html; charset=utf-8'} 21 | if 'gzip' in request.headers.get('Acccept-Encoding', 'identity'): 22 | outs = io.BytesIO() 23 | with GZipFile(outs) as gf: 24 | gf.write(content) 25 | content = outs.getvalue() 26 | out_headers['Content-Encoding'] = 'gZiP' 27 | return (200, content, out_headers) 28 | return on_request 29 | 30 | 31 | def _deflate_test_controller(html): 32 | def on_request(request): 33 | content = html.encode('utf-8') 34 | out_headers = {'Content-Type': 'text/html; charset=utf-8'} 35 | if 'deflate' in request.headers.get('Acccept-Encoding', 'identity'): 36 | content = zlib.compress(content) 37 | out_headers['Content-Encoding'] = 'deflate' 38 | return (200, content, out_headers) 39 | return on_request 40 | 41 | 42 | def _breach_vulnerable(): 43 | token = tutil.random_token(16) 44 | return { 45 | '/': _gzip_test_controller(u''' 46 | 47 | 48 |
49 | 50 | 51 |
52 | 53 | 54 | ''' % token), 55 | '/post': tutil.TokenController(token) 56 | } 57 | 58 | 59 | class BreachTest(unittest.TestCase): 60 | attack = webvulnscan.attacks.breach 61 | 62 | @tutil.webtest(False) 63 | def test_breach_static_site(): 64 | return {'/': u''} 65 | 66 | @tutil.webtest(False) 67 | def test_activated_gzip(): 68 | return { 69 | '/': _gzip_test_controller(u'') 70 | } 71 | 72 | @tutil.webtest(False) 73 | def test_no_token(): 74 | return {'/': _gzip_test_controller(u''' 75 | 76 | 77 |
78 | 79 | 80 | 81 |
82 | 83 | 84 | '''), 85 | '/search': ( 86 | 200, 87 | b'Here are your results', 88 | {'Content-Type': 'text/html; charset=utf-8'})} 89 | 90 | @tutil.webtest(True) 91 | def test_breach_vulnerable(): 92 | return _breach_vulnerable() 93 | 94 | @unittest.skip('Not yet supported') 95 | def test_breach_vulnerable_urltoken(): 96 | token = tutil.random_token(16) 97 | html = u''' 98 | 99 | 100 |
101 | 102 |
103 | 104 | 105 | ''' % token 106 | client = tutil.TestClient({ 107 | '/': _gzip_test_controller(html), 108 | '/post': tutil.TokenController(token, method='get') 109 | }) 110 | client.log.assert_count(1) 111 | 112 | @tutil.webtest(False) 113 | def test_activated_deflate(): 114 | return {'/': _deflate_test_controller(u'')} 115 | 116 | @tutil.webtest(False) 117 | def test_no_token_with_deflate(): 118 | html = u''' 119 | 120 | 121 |
122 | 123 | 124 | 125 |
126 | 127 | 128 | ''' 129 | return { 130 | '/': _deflate_test_controller(html), 131 | '/search': ( 132 | 200, 133 | b'Here are your results', 134 | {'Content-Type': 'text/html; charset=utf-8'}) 135 | } 136 | 137 | @tutil.webtest(True) 138 | def test_breach_vulnerable_with_deflate(): 139 | token = tutil.random_token(16) 140 | html = u''' 141 | 142 | 143 |
144 | 145 | 146 |
147 | 148 | 149 | ''' % token 150 | return { 151 | '/': _deflate_test_controller(html), 152 | '/post': tutil.TokenController(token), 153 | } 154 | 155 | @unittest.skip('Not yet supported') 156 | def test_breach_vulnerable_urltoken_with_deflate(): 157 | token = tutil.random_token(16) 158 | html = u''' 159 | 160 | 161 |
162 | 163 |
164 | 165 | 166 | ''' % token 167 | client = tutil.TestClient({ 168 | '/': _deflate_test_controller(html), 169 | '/post': tutil.TokenController(token, method='get') 170 | }) 171 | client.log.assert_count(1) 172 | 173 | if __name__ == '__main__': 174 | unittest.main() 175 | -------------------------------------------------------------------------------- /test/test_clickjack.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import tutil 5 | import webvulnscan 6 | 7 | FORM_HTML = b'''this is a form: 8 |
9 | 10 |
11 | ''' 12 | 13 | 14 | class ClickjackTest(unittest.TestCase): 15 | attack = webvulnscan.attacks.clickjack 16 | 17 | @tutil.webtest(False) 18 | def test_clickjack(): 19 | return { 20 | '/': u''' 21 | Links are (supposed to be) idempotent 22 | ''', 23 | '/go': u'''Nothing here!''' 24 | } 25 | 26 | @tutil.webtest(False) 27 | def test_clickjack_get_form(): 28 | return { 29 | '/': u''' 30 |
31 | The default method is GET, so this should be fine 32 | 33 |
34 | ''' 35 | } 36 | 37 | @tutil.webtest(False) 38 | def test_clickjack_get_form_second(): 39 | return { 40 | '/': u''' 41 |
42 | Explicitly specifying GET works too 43 | 44 |
45 | ''' 46 | } 47 | 48 | @tutil.webtest(True) 49 | def test_clickjack_vulnerable_site(): 50 | return { 51 | '/': ( 52 | 200, FORM_HTML, 53 | {'Content-Type': 'text/html; charset=utf-8'}), 54 | '/delete': u'''Executed!''' 55 | } 56 | 57 | @tutil.webtest(True) 58 | def test_clickjack_vulnerable_alternative_content_type(): 59 | return { 60 | '/': ( 61 | 200, FORM_HTML, 62 | {'Content-Type': 'application/xhtml+xml; charset=utf-8'}), 63 | '/delete': u'''Executed!''' 64 | 65 | } 66 | 67 | @tutil.webtest(False) 68 | def test_clickjack_secured_site(): 69 | return { 70 | '/': ( 71 | 200, FORM_HTML, 72 | {'Content-Type': 'text/html; charset=utf-8', 73 | 'X-Frame-Options': 'DENY'}), 74 | '/delete': u'''Executed!''' 75 | } 76 | 77 | @tutil.webtest(False) 78 | def test_clickjack_sameorigin_site(): 79 | return { 80 | '/': ( 81 | 200, FORM_HTML, 82 | {'Content-Type': 'text/html; charset=utf-8', 83 | 'X-Frame-Options': 'SAMEORIGIN'}), 84 | '/delete': u'''Executed!''' 85 | } 86 | 87 | @tutil.webtest(False) 88 | def test_clickjack_allowfrom_site(): 89 | return { 90 | '/': ( 91 | 200, FORM_HTML, 92 | {'Content-Type': 'text/html; charset=utf-8', 93 | 'X-Frame-Options': 'ALLOW-FROM http://safe.example.org/'}), 94 | '/delete': u'''Executed!''' 95 | } 96 | 97 | @tutil.webtest(True) 98 | def test_invalid_header(): 99 | return { 100 | '/': ( 101 | 200, FORM_HTML, 102 | {'Content-Type': 'text/html; charset=utf-8', 103 | 'X-Frame-Options': 'None please!'}), 104 | '/delete': u'''Executed!''' 105 | } 106 | -------------------------------------------------------------------------------- /test/test_crawler.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import tutil 4 | import webvulnscan.crawler 5 | 6 | 7 | class CrawlerTest(unittest.TestCase): 8 | 9 | def _assert_crawled(self, crawler, client, expected_raw): 10 | expected = set(map(client.full_url, expected_raw)) 11 | matched = set(page.url for page in crawler) 12 | self.assertEqual(matched, set(expected)) 13 | 14 | def test_imglink(self): 15 | client = tutil.TestClient({ 16 | u'/': ( 17 | 200, 18 | b'another page', 19 | {'Content-Type': 'text/html; charset=utf-8'}), 20 | u'/b': ( 21 | 200, 22 | b'image', 23 | {'Content-Type': 'text/html; charset=utf-8'}), 24 | u'/img': ( 25 | 200, 26 | b'[image]resolve this', 27 | {'Content-Type': 'image/png'}), 28 | }) 29 | crawler = webvulnscan.crawler.Crawler( 30 | client.ROOT_URL, tutil.ContainsEverything(), client=client) 31 | self._assert_crawled(crawler, client, [u'/', u'/b']) 32 | 33 | def test_invalid_characters(self): 34 | client = tutil.TestClient({ 35 | u'/': ( 36 | 200, 37 | b'\xfc', 38 | {'Content-Type': 'text/html; charset=utf-8'}), 39 | }) 40 | crawler = webvulnscan.crawler.Crawler( 41 | client.ROOT_URL, tutil.ContainsEverything(), client=client) 42 | 43 | list(crawler) # Crawl all pages - this should not throw an exception 44 | client.log.assert_found('0xfc') 45 | client.log.assert_count(1) 46 | 47 | 48 | if __name__ == '__main__': 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /test/test_crlf.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import cgi 3 | import tutil 4 | import unittest 5 | 6 | from webvulnscan.page import Page 7 | from webvulnscan.utils import get_param, parse_http_headers 8 | import webvulnscan.attacks.crlf 9 | 10 | try: 11 | from urllib.parse import unquote 12 | except ImportError: 13 | from urllib2 import unquote 14 | 15 | 16 | def header_site(getparam, vulnerable): 17 | if vulnerable: 18 | encode = lambda s: s.encode('utf-8') 19 | else: 20 | # There is no defined encoding in practice, so let's just base64 21 | # all user input 22 | encode = lambda s: base64.b64encode(s.encode('utf-8')) 23 | 24 | def site(req): 25 | p = getparam(req) 26 | if p is None: 27 | p = "" 28 | html = (u'%s' % cgi.escape(p)).encode('utf-8') 29 | header_bytes = b'\r\n'.join([ 30 | b'Content-Type: text/html; charset=utf-8', 31 | b'Set-Cookie: url=' + encode(p) 32 | ]) 33 | headers = parse_http_headers(header_bytes) 34 | parsed_headers = {} 35 | for value, key in headers.items(): 36 | parsed_headers[value] = key 37 | return (200, html, parsed_headers) 38 | return site 39 | 40 | 41 | class CRLFAttackerTest(unittest.TestCase): 42 | attack = webvulnscan.attacks.crlf 43 | argument = "?foo=bar" 44 | 45 | @tutil.webtest(False) 46 | def test_clrf_static_site(): 47 | return { 48 | '/': lambda req: u'%s' % cgi.escape(req.url), 49 | } 50 | 51 | @tutil.webtest(True) 52 | def test_clrf_vulnerable_url_site(): 53 | return { 54 | '/': header_site(lambda req: get_param(req.url, 'foo'), True) 55 | } 56 | 57 | @tutil.webtest(False) 58 | def test_clrf_secure_url_site(): 59 | return { 60 | '/': header_site(lambda req: get_param(req.url, 'foo'), False) 61 | } 62 | 63 | @tutil.webtest(True) 64 | def test_clrf_vulnerable_post_site(): 65 | return { 66 | '/': u'''
67 |
''', 68 | '/post': header_site(lambda req: req.parameters.get('foo'), True) 69 | } 70 | 71 | @tutil.webtest(False) 72 | def test_clrf_secure_post_site(): 73 | return { 74 | '/': u'''
75 |
''', 76 | '/post': header_site(lambda req: req.parameters.get('foo'), False) 77 | } 78 | 79 | @tutil.webtest(True) 80 | def test_clrf_vulnerable_get_site(): 81 | return { 82 | '/': u'''
83 |
''', 84 | '/post': header_site(lambda req: get_param(req.url, 'foo'), True) 85 | } 86 | 87 | @tutil.webtest(False) 88 | def test_clrf_secure_get_site(): 89 | return { 90 | '/': u'''
91 |
''', 92 | '/post': header_site(lambda req: get_param(req.url, 'foo'), False) 93 | } 94 | -------------------------------------------------------------------------------- /test/test_csrf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | import tutil 5 | import webvulnscan.attacks.csrf 6 | from webvulnscan.utils import get_param 7 | 8 | 9 | def csrf_page(test_token): 10 | def sitef(req): 11 | if test_token(req): 12 | return u'Thanks, posted' 13 | else: 14 | return ( 15 | 400, 16 | b'CSRF denied', 17 | {'Content-Type': 'text/html; charset=utf-8'}) 18 | return sitef 19 | 20 | FORM_HTML = u''' 21 |
22 | 23 | 24 |
25 | ''' 26 | 27 | 28 | def test_csrf_protected_form(): 29 | token = tutil.random_token(8) 30 | return { 31 | '/': FORM_HTML % token, 32 | '/s': csrf_page(lambda req: get_param(req.url, 'text')) 33 | } 34 | 35 | 36 | def test_csrf_vulnerable_form(): 37 | token = tutil.random_token(8) 38 | return { 39 | '/': FORM_HTML % token, 40 | '/s': csrf_page(lambda req: True) 41 | } 42 | 43 | 44 | class CsrfTest(unittest.TestCase): 45 | attack = webvulnscan.attacks.csrf 46 | 47 | @tutil.webtest(False) 48 | def test_static_site(): 49 | return { 50 | '/': u'''''', 51 | } 52 | 53 | @tutil.webtest(False) 54 | def test_csrf_protected_form(): 55 | return test_csrf_protected_form() 56 | 57 | @tutil.webtest(True) 58 | def test_csrf_vulnerable_post_form(): 59 | return test_csrf_vulnerable_form() 60 | -------------------------------------------------------------------------------- /test/test_exotic_characters.py: -------------------------------------------------------------------------------- 1 | import tutil 2 | import unittest 3 | import sys 4 | 5 | import webvulnscan.attacks.exotic_characters 6 | from webvulnscan.utils import get_param 7 | 8 | try: 9 | from urllib.parse import unquote 10 | except ImportError: 11 | from urllib2 import unquote 12 | 13 | 14 | SHELL_CHARACTERS = u'"\'|;<>\0' 15 | GENERIC_FORM = u''' 16 |
17 | 18 |
19 | ''' 20 | 21 | 22 | def shell_emulation(getinput): 23 | def site(req): 24 | s = getinput(req) 25 | # A real application would run subprocess.Popen(..., shell=True) or so 26 | if any(c in s for c in SHELL_CHARACTERS): 27 | return ( 28 | 500, 29 | b'Syntax Error', 30 | {'Content-Type': 'text/html; charset=utf-8'} 31 | ) 32 | return u'Process executed.' 33 | return site 34 | 35 | 36 | class ExoticCharacterTest(unittest.TestCase): 37 | attack = webvulnscan.attacks.exotic_characters 38 | argument = '?test=a' 39 | 40 | @tutil.webtest(False) 41 | def test_exotic_characters_static_site(): 42 | return { 43 | '/': u'''''', 44 | } 45 | 46 | @tutil.webtest(True) 47 | def test_exotic_characters_url_vulnerable_site(): 48 | return { 49 | '/': shell_emulation(lambda req: get_param(req.url, 'test')), 50 | } 51 | 52 | @tutil.webtest(True) 53 | def test_exotic_characters_post_vulnerable_site(): 54 | return { 55 | '/': GENERIC_FORM, 56 | '/post': shell_emulation(lambda req: req.parameters['test']), 57 | } 58 | 59 | @tutil.webtest(False) 60 | def test_exotic_characters_valid_parsing(): 61 | return { 62 | '/': GENERIC_FORM, 63 | '/post': u'Properly escaped command', 64 | } 65 | -------------------------------------------------------------------------------- /test/test_form.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import xml.etree.ElementTree as ET 3 | 4 | import tutil 5 | import webvulnscan.form 6 | 7 | 8 | class FormTest(unittest.TestCase): 9 | def test_no_inputs_no_action(self): 10 | doc = ET.fromstring('
') 11 | form = webvulnscan.form.Form('http://test/', doc) 12 | self.assertEqual({}, dict(form.get_inputs())) 13 | self.assertEqual("http://test/", form.action) 14 | 15 | def test_no_inputs_with_Action(self): 16 | doc = ET.fromstring('
') 17 | form = webvulnscan.form.Form('http://test/', doc) 18 | self.assertEqual(doc.items(), form.document.items()) 19 | self.assertEqual(doc.keys(), form.document.keys()) 20 | self.assertEqual("http://test/test", form.action) 21 | self.assertEqual("get", form.method) 22 | 23 | def test_one_input_no_action(self): 24 | doc = '
' 25 | doc = ET.fromstring(doc) 26 | form = webvulnscan.form.Form('http://test/', doc) 27 | self.assertEqual({"test": "abcdefgh"}, 28 | dict(form.get_parameters())) 29 | self.assertEqual("http://test/", form.action) 30 | 31 | def test_one_input_with_action(self): 32 | doc = '
' \ 33 | '
' 34 | doc = ET.fromstring(doc) 35 | form = webvulnscan.form.Form('http://test/', doc) 36 | self.assertEqual({"test": "abcdefgh"}, 37 | dict(form.get_parameters())) 38 | self.assertEqual("http://test/test", form.action) 39 | 40 | def test_serveral_inputs_no_action(self): 41 | doc = '
' \ 42 | '
' 43 | doc = ET.fromstring(doc) 44 | form = webvulnscan.form.Form('http://test/', doc) 45 | self.assertEqual({"test": "abcdefgh", "click": ""}, 46 | dict(form.get_parameters())) 47 | self.assertEqual("http://test/", form.action) 48 | 49 | def test_serveral_inputs_with_action(self): 50 | doc = '
' \ 51 | '
' 52 | doc = ET.fromstring(doc) 53 | form = webvulnscan.form.Form('http://test/', doc) 54 | self.assertEqual({"test": "abcdefgh", "click": ""}, 55 | dict(form.get_parameters())) 56 | self.assertEqual("http://test/action", form.action) 57 | 58 | def test_form_with_textarea(self): 59 | doc = '
' + \ 60 | '') 11 | textarea = webvulnscan.textarea.TextArea(doc) 12 | self.assertEqual(textarea.get_name, "area") 13 | self.assertEqual(textarea.get_type, "textarea") 14 | 15 | def test_placeholder(self): 16 | doc = ET.fromstring('') 18 | textarea = webvulnscan.textarea.TextArea(doc) 19 | self.assertEqual(textarea.get_name, "area") 20 | self.assertEqual(textarea.guess_value(), "somedata") 21 | self.assertEqual(textarea.get_type, "textarea") 22 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import tutil 4 | import xml.etree.ElementTree as ET 5 | from webvulnscan import utils 6 | 7 | 8 | class SimulatedPage(object): 9 | def __init__(self, document): 10 | self.document = document 11 | 12 | 13 | class UtilsTest(unittest.TestCase): 14 | def test_change_parameter_with_query(self): 15 | link = 'http://x.yz/?other=11&val=22&yet=3' 16 | generated = utils.change_parameter(link, "val", "42") 17 | self.assertTrue('val=22' not in generated) 18 | self.assertTrue('val=42' in generated) 19 | self.assertTrue('other=11' in generated) 20 | self.assertTrue('yet=3' in generated) 21 | 22 | def test_change_parameter_no_query(self): 23 | link = 'http://x.yz/' 24 | generated = utils.change_parameter(link, "val", "42") 25 | self.assertEqual(generated, link) 26 | 27 | def test_get_url_host(self): 28 | link = 'http://random.host/test/value' 29 | self.assertEqual(utils.get_url_host(link), "random.host") 30 | 31 | def test_get_page_text_no_text(self): 32 | doc = ET.fromstring('') 33 | page = SimulatedPage(doc) 34 | self.assertEqual(list(utils.get_page_text(page)), []) 35 | 36 | def test_get_page_text_with_text(self): 37 | doc = ET.fromstring('textsubtext') 38 | page = SimulatedPage(doc) 39 | self.assertEqual(list(utils.get_page_text(page)), 40 | ['text', 'subtext']) 41 | 42 | def test_modify_parameters(self): 43 | parameters = {'test': 'abc', 'test2': 'cba'} 44 | new_parameters = utils.modify_parameter(parameters, 45 | 'test', 'cba') 46 | parameter_list = list(new_parameters.values()) 47 | self.assertEqual(parameter_list, ['cba', 'cba']) 48 | -------------------------------------------------------------------------------- /test/test_xss.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import cgi 3 | import sys 4 | 5 | import tutil 6 | import webvulnscan.attacks.xss 7 | 8 | try: 9 | from urllib.parse import unquote 10 | except ImportError: 11 | from urllib2 import unquote 12 | 13 | 14 | def form_client(method, echo_param): 15 | form = u''' 16 | 17 |
''' % method 18 | 19 | def xss_site(req): 20 | return u'' + echo_param(req) + u'' 21 | 22 | return { 23 | '/': form, 24 | '/send': xss_site, 25 | } 26 | 27 | 28 | class XssTest(unittest.TestCase): 29 | attack = webvulnscan.attacks.xss 30 | argument = '?test=foo' 31 | 32 | @tutil.webtest(False) 33 | def test_xss_static_site(): 34 | return { 35 | '/': u'''''', 36 | } 37 | 38 | @tutil.webtest(True) 39 | def test_xss_post_vulnerable_site(): 40 | return form_client('post', 41 | lambda req: req.parameters['text']) 42 | 43 | @tutil.webtest(False) 44 | def test_xss_post_secure_site(): 45 | return form_client('post', 46 | lambda req: cgi.escape(req.parameters['text'])) 47 | 48 | @tutil.webtest(True) 49 | def test_xss_url_vulnerable_site(): 50 | return { 51 | '/': lambda req: u'' + unquote(req.url) + '', 52 | } 53 | 54 | @tutil.webtest(False) 55 | def test_xss_url_secure_site(): 56 | return { 57 | '/': lambda req: (u'' + 58 | cgi.escape(unquote(req.url)) + ''), 59 | } 60 | -------------------------------------------------------------------------------- /test/tutil.py: -------------------------------------------------------------------------------- 1 | """ Common test setup functions """ 2 | 3 | import collections 4 | import logging 5 | import os.path 6 | import string 7 | import sys 8 | import random 9 | 10 | 11 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 12 | sys.path.append(root_dir) 13 | 14 | # If this fails, we failed to set up the correct path above 15 | import webvulnscan 16 | 17 | 18 | def random_token(length=8): 19 | return ''.join(random.choice(string.hexdigits) for _ in range(length)) 20 | 21 | 22 | class TestLog(webvulnscan.log.Log): 23 | def assert_found(self, sub): 24 | assert any(sub in e.message for e in self.entries), ( 25 | u'Expected to see "%s", but only got %r' % ( 26 | (sub, [e.message for e in self.entries]))) 27 | 28 | def assert_count(self, expected): 29 | assert len(self.entries) == expected, ( 30 | u'Expected to see %d log entries, but got %d in log %r' % 31 | (expected, len(self.entries), list(self.entries))) 32 | 33 | def assert_vulnerable(self, vulnerable): 34 | was_vulnerable = len(self.entries) != 0 35 | assert was_vulnerable == vulnerable 36 | 37 | 38 | # A class for writing site which are detemined 39 | # to be request by webvulnscan.Client() 40 | class ClientSite(object): 41 | def __init__(self): 42 | pass 43 | 44 | def download(self, url, parameters=None, remember_visited=None): 45 | pass 46 | 47 | def download_page(self, url, parameters=None, remember_visited=None): 48 | pass 49 | 50 | 51 | class TestClient(webvulnscan.client.Client): 52 | """ url_map is a dict whose keys are either URLs or query strings, 53 | , and whose values are one of: 54 | * tuples of (status_code, response_data, headers) 55 | * just a unicode string 56 | * a callable returning a tuple or unicode string 57 | 58 | For example, a valid url_map looks like: 59 | { 60 | u'http://localhost/': (200, b'', {}), 61 | u'/404': (404, b'Not found', {'Content-Type': 'text/html;'}), 62 | u'/req': lambda request: u'', 63 | } 64 | """ 65 | 66 | EXAMPLE_PREFIX = u'http://test.webvulnscan' 67 | 68 | def __init__(self, url_map, *args, **kwargs): 69 | super(TestClient, self).__init__(*args, log=TestLog(), **kwargs) 70 | self.url_map = dict( 71 | (self.full_url(url), content) 72 | for url, content in url_map.items() 73 | ) 74 | 75 | @property 76 | def ROOT_URL(self): 77 | return self.EXAMPLE_PREFIX + u'/' 78 | 79 | def full_url(self, url): 80 | return url if u'://' in url else self.EXAMPLE_PREFIX + url 81 | 82 | def _download(self, req): 83 | req_url = req.url.partition(u'?')[0] 84 | assert req_url in self.url_map, u'Invalid request to %r' % req_url 85 | res = self.url_map[req_url] 86 | if callable(res): 87 | headers = {} 88 | res = res(req) 89 | if isinstance(res, type(u'')): 90 | status_code = 200 91 | response_data = res.encode('utf-8') 92 | headers = {'Content-Type': 'text/html; charset=utf-8'} 93 | else: 94 | status_code, response_data, headers = res 95 | 96 | assert isinstance(response_data, bytes), ( 97 | u'Got invalid test response body %r' % (response_data,)) 98 | return (req, status_code, response_data, headers) 99 | 100 | def run_attack(self, attack, add_url=u''): 101 | root_page = self.download_page(self.ROOT_URL + add_url) 102 | return attack(self, self.log, root_page) 103 | 104 | 105 | def webtest(vulnerable): 106 | def wrapper(func): 107 | client = TestClient(func()) 108 | argument = "" 109 | 110 | def res_func(self): 111 | if hasattr(self, "argument"): 112 | client.run_attack(self.attack, self.argument) 113 | argument = self.argument 114 | else: 115 | client.run_attack(self.attack) 116 | client.log.assert_vulnerable(vulnerable) 117 | 118 | res_func.__name__ = func.__name__ 119 | res_func.client = client 120 | res_func.argument = argument 121 | 122 | return res_func 123 | 124 | return wrapper 125 | 126 | 127 | class ContainsEverything(object): 128 | def __contains__(self, x): 129 | return True 130 | 131 | 132 | def TokenController(value, method='post', field_name='token'): 133 | assert method in ('get', 'post') 134 | 135 | def on_request(request): 136 | parameters = request.parameters 137 | headers = request.headers 138 | url = request.url 139 | sent_value = parameters.get(field_name, u'') 140 | out_headers = {'Content-Type': 'text/html; charset=utf-8'} 141 | if value == sent_value: 142 | content = b'Done.' 143 | return (200, content, out_headers) 144 | else: 145 | content = b'Wrong token.' 146 | return (400, content, out_headers) 147 | return on_request 148 | 149 | 150 | __all__ = ('TestLog', 'TestClient', 'TokenController', 'ContainsEverything') 151 | -------------------------------------------------------------------------------- /test/web_runner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import unicode_literals 3 | 4 | import cgi 5 | import io 6 | import os 7 | import socket 8 | import unittest 9 | import sys 10 | 11 | try: 12 | from http.server import BaseHTTPRequestHandler, HTTPServer 13 | except ImportError: 14 | from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer 15 | 16 | try: 17 | from urllib.parse import urlparse, parse_qs 18 | except ImportError: 19 | from urlparse import urlparse, parse_qs 20 | 21 | _WVS_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 22 | 23 | sys.path.append(_WVS_ROOT_DIR) 24 | import webvulnscan 25 | 26 | sitemap = {} 27 | 28 | 29 | class WebRunnerHandler(BaseHTTPRequestHandler): 30 | def _write(self, s): 31 | return self.wfile.write(s.encode('utf-8')) 32 | 33 | def _default_page(self): 34 | self.send_response(200) 35 | self.send_header("Content-Type", "text/html") 36 | self.end_headers() 37 | w = self._write 38 | w(""" 39 | 40 | 41 | 42 | webvulnscan tests 43 | 44 | 45 |

webvulnscan tests

46 | 47 | 57 | 58 | """) 59 | 60 | def _serve_request(self): 61 | parsed_path = urlparse(self.path) 62 | current_path = parsed_path.path.split('/')[1] 63 | 64 | if parsed_path.path == "/": 65 | self._default_page() 66 | elif current_path in sitemap: 67 | extended_path = "".join(parsed_path.path.split('/')[2:]) 68 | 69 | site = sitemap[current_path] 70 | client = site.client 71 | 72 | if parsed_path.query == "": 73 | url = "http://test.webvulnscan/" + extended_path 74 | else: 75 | url = "http://test.webvulnscan/" + extended_path +\ 76 | "?" + parsed_path.query 77 | 78 | request = webvulnscan.request.Request(url) 79 | 80 | if 'content-length' in self.headers: 81 | content_len = int(self.headers['content-length']) 82 | body = self.rfile.read(content_len) 83 | request.parameters = parse_qs(body) 84 | 85 | for value in request.parameters: 86 | new_value = request.parameters[value][0].decode('utf-8') 87 | request.parameters[value] = new_value 88 | 89 | _, status_code, response_data, headers = client._download(request) 90 | self.send_response(status_code) 91 | self.send_header('Content-Type', 'text/html') 92 | for header in headers: 93 | self.send_header(header[0], header[1]) 94 | self.end_headers() 95 | 96 | self.wfile.write(response_data) 97 | else: 98 | self.send_error(404, "File not Found!") 99 | 100 | def __getattr__(self, name): 101 | if name.startswith('do_'): 102 | return self._serve_request 103 | raise AttributeError() 104 | 105 | 106 | def discover(): 107 | testloader = unittest.TestLoader() 108 | suites = testloader.discover(os.path.join(_WVS_ROOT_DIR, 'test')) 109 | for suite in suites: 110 | for klass in suite: 111 | for test in klass._tests: 112 | elements = dir(test) 113 | for subklass in elements: 114 | func = getattr(test, subklass) 115 | if hasattr(func, "client"): 116 | yield func 117 | 118 | 119 | def main(): 120 | for test in discover(): 121 | sitemap[test.__name__] = test 122 | 123 | httpd = HTTPServer(("", 8000), WebRunnerHandler) 124 | httpd.serve_forever() 125 | 126 | if __name__ == "__main__": 127 | main() 128 | -------------------------------------------------------------------------------- /webvulnscan/__init__.py: -------------------------------------------------------------------------------- 1 | import signal 2 | import sys 3 | 4 | from .attacks import all_attacks 5 | from .client import Client 6 | from .compat import MozillaCookieJar, urlparse 7 | from .crawler import Crawler 8 | from .log import Log 9 | from .options import parse_options 10 | from .utils import get_url_host 11 | from .utils import write_config 12 | 13 | 14 | def run(options, targets): 15 | options.whitelist = set(options.whitelist) 16 | options.blacklist = set(options.blacklist) 17 | 18 | attacks = [] 19 | for attack in all_attacks(): 20 | if options.__dict__[attack.__name__]: 21 | attacks.append(attack) 22 | 23 | if not attacks: 24 | attacks = all_attacks() 25 | 26 | for attack in attacks: 27 | except_attack = options.__dict__[attack.__name__ + "_except"] 28 | 29 | if not except_attack: 30 | continue 31 | 32 | attacks.remove(attack) 33 | 34 | if options.verbose: 35 | log = Log(verbosity='info', direct_print=True) 36 | elif options.vuln_only: 37 | log = Log(verbosity=u'vuln') 38 | else: 39 | log = Log() 40 | client = Client(log=log) 41 | 42 | if options.import_cookies: 43 | client.cookie_jar = MozillaCookieJar(options.import_cookies) 44 | client.cookie_jar.load() 45 | 46 | # TODO This is horrible. Remove it! 47 | if options.auth_url is not None and options.auth_data is not None: 48 | post_data = {} 49 | 50 | for field in options.auth_data: 51 | name, _, value = field.partition('=') 52 | post_data.update({name: value}) 53 | 54 | _, text, _ = client.download(options.auth_url, post_data) 55 | elif options.form_page and options.form_id: 56 | form_data = {} 57 | 58 | for field in options.form_data: 59 | name, _, value = field.partition('=') 60 | form_data.update({name: value}) 61 | 62 | form_page = client.download_page(options.form_page) 63 | form = [x for x in form_page.get_forms() 64 | if x.document.attrib.get('id') == options.form_id][0] 65 | 66 | entries = dict(form.get_parameters()) 67 | 68 | for option, value in form_data.items(): 69 | entries[option] = value 70 | 71 | form.send(client, entries) 72 | 73 | try: 74 | for target in targets: 75 | if not urlparse(target).scheme: 76 | target = u'http://' + target 77 | 78 | options.whitelist.add(get_url_host(target)) 79 | 80 | if options.no_crawl: 81 | all_pages = [client.download_page(target)] 82 | else: 83 | all_pages = Crawler(target, options.whitelist, client, 84 | options.blacklist) 85 | 86 | for page in all_pages: 87 | log('info', page.url, 'crawler', 'Scanning ...') 88 | 89 | for attack in attacks: 90 | attack(client, log, page) 91 | 92 | finally: 93 | if not options.verbose: 94 | log.print_report(summarize=options.do_print) 95 | 96 | 97 | def main(): 98 | # Handle SIGPIPE (sent when someone is processing our output and is done) 99 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) 100 | 101 | options, arguments = parse_options() 102 | 103 | if options.write_config: 104 | write_config(options.write_config, options, arguments) 105 | sys.exit(0) 106 | 107 | try: 108 | messages = run(options, arguments) 109 | except KeyboardInterrupt: 110 | sys.exit(130) 111 | except SystemExit: 112 | sys.exit(1) 113 | except BaseException: 114 | raise 115 | 116 | if messages: 117 | sys.exit(1) 118 | -------------------------------------------------------------------------------- /webvulnscan/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Execute with 4 | # $ python webvulnscan/__main__.py (2.6+) 5 | # $ python -m webvulnscan (2.7+) 6 | 7 | import sys 8 | 9 | if __package__ is None and not hasattr(sys, "frozen"): 10 | # direct call of __main__.py 11 | import os.path 12 | path = os.path.realpath(os.path.abspath(__file__)) 13 | sys.path.append(os.path.dirname(os.path.dirname(path))) 14 | 15 | import webvulnscan 16 | 17 | if __name__ == '__main__': 18 | webvulnscan.main() 19 | -------------------------------------------------------------------------------- /webvulnscan/attacks/__init__.py: -------------------------------------------------------------------------------- 1 | """ This modules provides various attacks and functions to run them. """ 2 | from .xss import xss 3 | from .csrf import csrf 4 | from .crlf import crlf 5 | from .breach import breach 6 | from .clickjack import clickjack 7 | from .cookiescan import cookiescan 8 | from .exotic_characters import exotic_characters 9 | 10 | 11 | def all_attacks(): 12 | return [xss, csrf, crlf, breach, clickjack, cookiescan, exotic_characters] 13 | -------------------------------------------------------------------------------- /webvulnscan/attacks/breach.py: -------------------------------------------------------------------------------- 1 | from ..utils import attack, change_parameter, could_be_secret 2 | 3 | 4 | def check_for_compression(headers, field='Content-Encoding'): 5 | v = headers.get(field, 'identity').split(',') 6 | gzip = 'gzip' not in (e.strip().lower() for e in v) 7 | deflate = 'deflate' not in (e.strip().lower() for e in v) 8 | return gzip or deflate 9 | 10 | 11 | def find_secrets(form): 12 | return set( 13 | (form_input.get_name, form_input.get_element_value) 14 | for form_input in form.get_inputs() 15 | if (form_input.get_type == "hidden" 16 | and could_be_secret(form_input.get_element_value))) 17 | 18 | 19 | @attack() 20 | def breach(client, log, target_page): 21 | if not check_for_compression(target_page.request.headers, 22 | 'Accept-Encoding'): 23 | # Redownload with request for gzip 24 | new_request = target_page.request.copy() 25 | new_request.headers['Accept-Encoding'] = "deflate, gzip" 26 | target_page = client.download_page(request) 27 | if not check_for_compression(target_page.headers): 28 | return 29 | 30 | secrets = dict((form.action, find_secrets(form)) 31 | for form in target_page.get_forms()) 32 | 33 | page_redownload = client.download_page(target_page.request) 34 | for form in page_redownload.get_forms(): 35 | redownload_secrets = find_secrets(form) 36 | previous_secrets = secrets[form.action] 37 | constant_secrets = previous_secrets.intersection(redownload_secrets) 38 | if constant_secrets: 39 | log('vuln', target_page.url, u'BREACH vulnerability', 40 | u'Secrets %r do not change during redownload' 41 | % dict(constant_secrets), 42 | request=target_page.request) 43 | -------------------------------------------------------------------------------- /webvulnscan/attacks/clickjack.py: -------------------------------------------------------------------------------- 1 | from ..compat import urlparse 2 | from ..utils import attack 3 | 4 | 5 | def check_for_post_forms(page): 6 | return any(form 7 | for form in page.get_forms() 8 | if form.method == 'post') 9 | 10 | 11 | def is_valid_header(frame_options): 12 | if frame_options == "DENY": 13 | return True 14 | 15 | if frame_options == "SAMEORIGIN": 16 | return True 17 | 18 | first_word, _, url = frame_options.partition(" ") 19 | if first_word == "ALLOW-FROM": 20 | netloc = urlparse(url).netloc 21 | if netloc: 22 | return True 23 | 24 | return False 25 | 26 | 27 | @attack() 28 | def clickjack(client, log, page): 29 | if 'Content-Type' in page.headers: 30 | content_type = page.headers['Content-Type'] 31 | else: 32 | content_Type = "" 33 | 34 | if not check_for_post_forms(page): 35 | return # No active content, so it's fine 36 | 37 | frame_options = page.headers.get('X-Frame-Options') 38 | if not frame_options: 39 | log('vuln', page.url, u'Clickjacking', u'no X-Frame-Options header') 40 | return 41 | 42 | if not is_valid_header(frame_options): 43 | log('vuln', page.url, u'Clickjacking', u'invalid X-Frame-Options!') 44 | -------------------------------------------------------------------------------- /webvulnscan/attacks/cookiescan.py: -------------------------------------------------------------------------------- 1 | from ..utils import attack 2 | 3 | 4 | def check_for_cookies(headers): 5 | return "Set-Cookie" in headers or "Set-Cookies" in headers 6 | 7 | 8 | def secure_cache_control(page): 9 | if "Cache-Control" in page.headers: 10 | cache_control = page.headers["Cache-Control"] 11 | 12 | if cache_control in "no-cache" or cache_control in "private": 13 | return True 14 | 15 | if "max-age" in cache_control and "0" in cache_control: 16 | return True 17 | 18 | return False 19 | 20 | 21 | @attack() 22 | def cookiescan(client, log, page): 23 | if not check_for_cookies(page.headers): 24 | return 25 | 26 | if not secure_cache_control(page): 27 | log('vuln', page.url, u"Implicit Cacheable Cookies") 28 | -------------------------------------------------------------------------------- /webvulnscan/attacks/crlf.py: -------------------------------------------------------------------------------- 1 | from ..utils import attack, change_parameter, modify_parameter 2 | 3 | BODY = u'o' 4 | CLRF_SEQUENCE = ( 5 | u"Content-Type: text/html\r\n" + 6 | u"Content-Length: %d\r\n\r\n" % len(BODY)) 7 | ATTACK_SEQUENCE = CLRF_SEQUENCE + BODY 8 | 9 | 10 | def attack_form(client, log, form): 11 | parameters = dict(form.get_parameters()) 12 | for parameter in parameters: 13 | attack_parameters = modify_parameter(parameters, parameter, 14 | ATTACK_SEQUENCE) 15 | result = form.send(client, attack_parameters) 16 | evaluate(log, form.action, result) 17 | 18 | 19 | def attack_url(client, log, url, parameter): 20 | attack_parameters = change_parameter(url, parameter, ATTACK_SEQUENCE) 21 | result = client.download_page(attack_parameters) 22 | evaluate(log, url, result) 23 | 24 | 25 | def evaluate(log, target, result): 26 | if result.headers.get('Content-Length') == str(len(BODY)): 27 | log('vuln', target, u'CRLF Injection', request=result.request) 28 | elif result.status_code == 500: 29 | log('warn', target, u'Parameter Parsing Error', request=result.request) 30 | 31 | 32 | def search(page): 33 | for form in page.get_forms(): 34 | yield ('form', form) 35 | 36 | for parameter, _ in page.url_parameters: 37 | yield ('url', page.url, parameter) 38 | 39 | 40 | @attack(search) 41 | def crlf(client, log, target_type, *args): 42 | globals()['attack_' + target_type](client, log, *args) 43 | -------------------------------------------------------------------------------- /webvulnscan/attacks/csrf.py: -------------------------------------------------------------------------------- 1 | from ..utils import attack 2 | 3 | 4 | def fill_entries(form, filter_type=None): 5 | for form_input in form.get_inputs(): 6 | input_name = form_input.get_name 7 | input_value = form_input.guess_value() 8 | input_type = form_input.get_type 9 | 10 | if filter_type is None: 11 | yield input_name, input_value 12 | else: 13 | if input_type != filter_type: 14 | yield input_name, input_value 15 | 16 | 17 | def search(page): 18 | for form in page.get_forms(): 19 | yield (form,) 20 | 21 | 22 | @attack(search) 23 | def csrf(client, log, form): 24 | # First, we send a valid request. 25 | valid_parameters = dict(fill_entries(form)) 26 | form.send(client, valid_parameters) 27 | 28 | # Now, we suppress everything that looks like a token. 29 | broken_parameters = dict(fill_entries(form, "hidden")) 30 | response = form.send(client, broken_parameters) 31 | 32 | # Check if Request passed 33 | if response.status_code == 200 and not form.is_search_form: 34 | # Request passed, CSRF found... 35 | log('vuln', form.action, 'CSRF Vulnerability', message=u'', 36 | request=response) 37 | -------------------------------------------------------------------------------- /webvulnscan/attacks/exotic_characters.py: -------------------------------------------------------------------------------- 1 | from ..utils import attack, change_parameter, modify_parameter, get_page_text 2 | 3 | SYMBOLS = { 4 | u'"', u"'", u'<', u'{', u'(', u')', u'}', u'>', u'&', u'|', u';', 5 | u'\u1D550', u'\x00', u'\uFFFF'} 6 | DETECT_STRING = "test" 7 | 8 | 9 | def is_error_code(page): 10 | return page.status_code in [500, 503] 11 | 12 | 13 | def attack_form(client, log, form, name, symbol): 14 | guessed_parameters = dict(form.get_parameters()) 15 | parameters = modify_parameter(guessed_parameters, name, 16 | symbol) 17 | try: 18 | page = form.send(client, parameters) 19 | except Exception as e: 20 | log('vuln', form.action, 'Possible incorrect Unicode Handling', 21 | repr(symbol)) 22 | return 23 | 24 | if is_error_code(page): 25 | log('vuln', form.action, 'Incorrect Unicode Handling', repr(symbol)) 26 | 27 | 28 | def attack_url(client, log, url, parameter): 29 | test_page = client.download_page( 30 | change_parameter(url, parameter, DETECT_STRING)) 31 | if is_error_code(test_page): 32 | return 33 | 34 | for symbol in SYMBOLS: 35 | new_url = change_parameter(url, parameter, symbol.encode('utf-8')) 36 | attacked_page = client.download_page(new_url) 37 | 38 | if is_error_code(attacked_page): 39 | log('vuln', url, 'Incorrect Unicode handling in URL', repr(symbol)) 40 | 41 | 42 | def search(page): 43 | for form in page.get_forms(): 44 | for name, _ in form.get_parameters(): 45 | for symbol in SYMBOLS: 46 | yield ('form', form, name, symbol) 47 | 48 | for parameter, _ in page.url_parameters: 49 | yield('url', page.url, parameter) 50 | 51 | 52 | @attack(search) 53 | def exotic_characters(client, log, target_type, *args): 54 | globals()['attack_' + target_type](client, log, *args) 55 | -------------------------------------------------------------------------------- /webvulnscan/attacks/xss.py: -------------------------------------------------------------------------------- 1 | from ..utils import attack, change_parameter 2 | 3 | XSS_STRING = u'' 4 | 5 | 6 | def attack_post(client, log, form): 7 | # A helper function for modifing values of the parameter list. 8 | def modify_parameter(target_name, value): 9 | parameters = dict(form.get_parameters()) 10 | parameters[target_name] = value 11 | return parameters 12 | 13 | for parameter_name, parameter_value in form.get_parameters(): 14 | # Replace value with XSS_STRING 15 | parameters = modify_parameter(parameter_name, XSS_STRING) 16 | 17 | # Send the form 18 | try: 19 | attacked_page = form.send(client, parameters) 20 | except Exception as e: 21 | log('warn', form.action, 22 | 'HTTP Errors occurs when confronted with html input', 23 | "in parameter" + parameter_name) 24 | return 25 | 26 | # Determine if the string is unfiltered on the page. 27 | if XSS_STRING in attacked_page.html: 28 | # Oh no! It is! 29 | log('vuln', attacked_page.url, "XSS", 30 | "in parameter " + parameter_name, 31 | request=attacked_page.request) 32 | 33 | 34 | def attack_get(client, log, url, parameter): 35 | # Replace the value of the parameter with XSS_STRING 36 | attack_url = change_parameter(url, parameter, XSS_STRING) 37 | # To run the attack, we just request the site. 38 | attacked_page = client.download_page(attack_url) 39 | # If XSS_STRING is found unfilitered in the site, we have a problem. 40 | if XSS_STRING in attacked_page.html: 41 | log('vuln', attacked_page.url, "XSS", "in URL parameter " + parameter) 42 | 43 | 44 | def search(page): 45 | for form in page.get_forms(): 46 | yield ('post', form) 47 | 48 | for parameter, _ in page.url_parameters: 49 | yield ('get', page.url, parameter) 50 | 51 | 52 | @attack(search) 53 | def xss(client, log, target_type, *args): 54 | globals()['attack_' + target_type](client, log, *args) 55 | -------------------------------------------------------------------------------- /webvulnscan/client.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import functools 3 | 4 | from .compat import build_opener, HTTPCookieProcessor, URLError, \ 5 | urlencode, CookieJar, HTTPError, BadStatusLine 6 | from .utils import parse_content_type, NOT_A_PAGE_CONTENT_TYPES 7 | 8 | import gzip 9 | import zlib 10 | import webvulnscan.log 11 | from .page import Page 12 | from .request import Request 13 | 14 | 15 | class NotAPage(Exception): 16 | """ The content at the URL in question is not a webpage, but something 17 | static (image, text, etc.) """ 18 | 19 | 20 | class Client(object): 21 | """ Client provides a easy interface for accessing web content. """ 22 | 23 | def __init__(self, log=webvulnscan.log): 24 | self.cookie_jar = CookieJar() 25 | self.opener = self.setup_opener() 26 | self.additional_headers = {} 27 | self.log = log 28 | 29 | def setup_opener(self): 30 | """ Builds the opener for the class. """ 31 | cookie_handler = HTTPCookieProcessor(self.cookie_jar) 32 | opener = build_opener(cookie_handler) 33 | 34 | return opener 35 | 36 | def _download(self, request): 37 | self.log('info', request.url, "request", "Trying to request") 38 | try: 39 | response = self.opener.open(request) 40 | except HTTPError as error: 41 | response = error 42 | except URLError as error: 43 | if hasattr(self.log, 'warn'): 44 | self.log.warn(url, "unreachable") 45 | raise URLError(request.url + ' is unreachable: {0}'.format(error)) 46 | except BadStatusLine as e: 47 | self.log('warn', request.url, 'Bad status line sent') 48 | return (request, 0, "", {}) 49 | 50 | status_code = response.code 51 | headers = response.info() 52 | 53 | if headers.get('Content-Encoding') == "gzip": 54 | sim_file = gzip.GzipFile(fileobj=response) 55 | response_data = sim_file.read() 56 | elif headers.get('Content-Encoding') == "deflate": 57 | response_data = zlib.decompress(response.read()) 58 | else: 59 | response_data = response.read() 60 | 61 | return (request, status_code, response_data, headers) 62 | 63 | def download(self, url_or_request, parameters=None, headers=None): 64 | """ 65 | Downloads a URL, returns (request, status_code, response_data, headers) 66 | """ 67 | 68 | if isinstance(url_or_request, Request): 69 | assert parameters is None 70 | assert headers is None 71 | request = url_or_request.copy() 72 | else: 73 | request = Request(url_or_request, parameters, headers) 74 | 75 | for header, value in self.additional_headers.items(): 76 | request.add_header(header, value) 77 | 78 | msg = ('Requesting with parameters %s' % (request.parameters,) 79 | if request.parameters else 80 | 'Requesting') 81 | self.log('info', request.url, 'client status', msg) 82 | 83 | return self._download(request) 84 | 85 | def download_page(self, url_or_request, parameters=None, req_headers=None): 86 | """ Downloads the content of a site, returns it as page. 87 | Throws NotAPage if the content is not a webpage. 88 | """ 89 | 90 | request, status_code, html_bytes, headers = self.download( 91 | url_or_request, parameters, req_headers) 92 | 93 | content_type, charset = parse_content_type( 94 | headers.get('Content-Type'), 95 | logfunc=functools.partial(self.log, 'warn', request.url)) 96 | 97 | if content_type in NOT_A_PAGE_CONTENT_TYPES: 98 | raise NotAPage() 99 | 100 | try: 101 | html = html_bytes.decode(charset, 'strict') 102 | except UnicodeDecodeError as ude: 103 | self.log('warn', request.url, 'Incorrect encoding', str(ude)) 104 | html = html_bytes.decode(charset, 'replace') 105 | 106 | return Page(self.log, request, html, headers, status_code) 107 | -------------------------------------------------------------------------------- /webvulnscan/compat.py: -------------------------------------------------------------------------------- 1 | try: 2 | from urllib.request import build_opener, Request, HTTPCookieProcessor, \ 3 | URLError, HTTPError 4 | except: 5 | from urllib2 import build_opener, Request, HTTPCookieProcessor, \ 6 | URLError, HTTPError 7 | 8 | try: 9 | from urllib.parse import urlencode, urljoin, parse_qsl, urlparse, \ 10 | urlencode, quote_plus, parse_qs 11 | except ImportError: 12 | from urlparse import urljoin, parse_qsl, parse_qs, urlparse 13 | from urllib import urlencode, quote_plus 14 | 15 | try: 16 | from http.cookiejar import CookieJar, MozillaCookieJar 17 | except ImportError: 18 | from cookielib import CookieJar, MozillaCookieJar 19 | 20 | try: 21 | from html.parser import HTMLParser 22 | except ImportError: # Python < 3 23 | from HTMLParser import HTMLParser 24 | 25 | try: 26 | from http.client import BadStatusLine 27 | except ImportError: 28 | from httplib import BadStatusLine 29 | -------------------------------------------------------------------------------- /webvulnscan/crawler.py: -------------------------------------------------------------------------------- 1 | from .client import Client, NotAPage 2 | from .utils import get_url_host 3 | 4 | from collections import deque 5 | from re import search 6 | 7 | 8 | class Crawler(object): 9 | """ Generator which systematically searches through a site. """ 10 | def __init__(self, entry_point, whitelist, client=None, blacklist=set()): 11 | """ 12 | Parameters: 13 | entry_point - where to start the search. 14 | whitelist - which host are allowed to be crawled. 15 | client - A client object which can be used. 16 | """ 17 | self.whitelist = whitelist 18 | self.blacklist = blacklist 19 | self.entry_point = entry_point 20 | 21 | self.visited_pages = set() 22 | self.to_visit = deque() 23 | 24 | if client is None: 25 | self.client = Client() 26 | else: 27 | self.client = client 28 | 29 | def __iter__(self): 30 | self.to_visit.append(self.entry_point) 31 | 32 | while self.to_visit: 33 | url = self.to_visit.pop() 34 | 35 | if not get_url_host(url) in self.whitelist: 36 | continue 37 | 38 | if any(search(x, url) for x in self.blacklist): 39 | continue 40 | 41 | url_without_hashbang, _, _ = url.partition("#") 42 | if url_without_hashbang in self.visited_pages: 43 | continue 44 | 45 | self.visited_pages.add(url_without_hashbang) 46 | try: 47 | page = self.client.download_page(url) 48 | except NotAPage: 49 | continue 50 | 51 | yield page 52 | self.to_visit.extend(page.get_links()) 53 | -------------------------------------------------------------------------------- /webvulnscan/form.py: -------------------------------------------------------------------------------- 1 | from .compat import urlencode, urljoin 2 | from .utils import add_get_params 3 | 4 | from .form_input import FormInput 5 | from .textarea import TextArea 6 | 7 | 8 | class Form(object): 9 | def __init__(self, url, document): 10 | self.document = document 11 | self.action = urljoin(url, document.attrib.get('action')) 12 | self.parameters = {} 13 | 14 | @property 15 | def method(self): 16 | return self.document.attrib.get('method', 'get').lower() 17 | 18 | @property 19 | def is_search_form(self): 20 | role = self.document.attrib.get('role', '').lower() 21 | form_class = self.document.attrib.get('class', '').lower() 22 | return role == "search" or form_class == "search" 23 | 24 | def get_inputs(self): 25 | for input_element in self.get_input_elements(): 26 | yield FormInput(input_element) 27 | 28 | for textarea in self.get_textarea_elements(): 29 | yield TextArea(textarea) 30 | 31 | def get_parameters(self): 32 | for item in self.get_inputs(): 33 | yield (item.get_name, item.guess_value()) 34 | 35 | def get_input_elements(self): 36 | for form_input in self.document.findall('.//input'): 37 | yield form_input 38 | 39 | def get_textarea_elements(self): 40 | for textarea in self.document.findall('.//textarea'): 41 | yield textarea 42 | 43 | def send(self, client, parameters): 44 | if self.method == "get": 45 | url = add_get_params(self.action, parameters) 46 | return client.download_page(url) 47 | else: 48 | return client.download_page(self.action, parameters) 49 | -------------------------------------------------------------------------------- /webvulnscan/form_input.py: -------------------------------------------------------------------------------- 1 | class FormInput(object): 2 | def __init__(self, element): 3 | self.element = element 4 | self.type_dictionary = {"text": "abcdefgh", 5 | "email": "ex@amp.le", 6 | "password": "abcd1234", 7 | "checkbox": "true", 8 | "radio": "1", 9 | "datetime": "1990-12-31T23:59:60Z", 10 | "datetime-local": 11 | "1985-04-12T23:20:50.52", 12 | "date": "1996-12-19", 13 | "month": "1996-12", 14 | "time": "13:37:00", 15 | "week": "1996-W16", 16 | "number": "123456", 17 | "range": "1.23", 18 | "url": "http://localhost/", 19 | "search": "query", 20 | "tel": "012345678", 21 | "color": "#FFFFFF", 22 | "hidden": "Secret.", 23 | "submit": ""} 24 | 25 | def _get_attrib_value(self, name): 26 | return self.element.attrib.get(name, "") 27 | 28 | @property 29 | def get_type(self): 30 | return self._get_attrib_value('type').lower() 31 | 32 | @property 33 | def get_name(self): 34 | return self._get_attrib_value('name') 35 | 36 | @property 37 | def get_element_value(self): 38 | return self._get_attrib_value('value') 39 | 40 | @property 41 | def minlength(self): 42 | try: 43 | return int(self._get_attrib_value('minlength')) 44 | except ValueError: 45 | return 0 46 | 47 | @property 48 | def maxlength(self): 49 | try: 50 | return int(self._get_attrib_value('maxlength')) 51 | except ValueError: 52 | return 0 53 | 54 | def guess_value(self): 55 | value = self.type_dictionary.get(self.get_type, '') 56 | supposed_value = self._get_attrib_value("value") 57 | 58 | if supposed_value: 59 | next_value = supposed_value 60 | else: 61 | next_value = value 62 | 63 | if self.get_type == "text": 64 | if self.maxlength < len(next_value) and not self.maxlength == 0: 65 | next_value = value[:self.maxlength] 66 | 67 | if self.minlength > len(next_value) and not self.minlength == 0: 68 | if len(next_value) != 0: 69 | required = len(next_value) - self.minlength \ 70 | / len(next_value) 71 | next_value = value.join(value[0] * int(required)) 72 | 73 | return next_value 74 | -------------------------------------------------------------------------------- /webvulnscan/html_parser.py: -------------------------------------------------------------------------------- 1 | from .compat import HTMLParser 2 | 3 | import collections 4 | import xml.etree.ElementTree 5 | 6 | 7 | def parse_html(html, url, log): 8 | parser = EtreeParser(url, log) 9 | return xml.etree.ElementTree.fromstring(html, parser) 10 | 11 | 12 | class EtreeParser(HTMLParser): 13 | def __init__(self, url, log): 14 | # We need this ancient super form because HTMLParser is a 15 | # classic class in 2.x 16 | HTMLParser.__init__(self) 17 | self.tb = xml.etree.ElementTree.TreeBuilder() 18 | self.tag_stack = collections.deque() 19 | self.url = url 20 | self._log = log 21 | 22 | def handle_starttag(self, tag, attrs): 23 | self.tag_stack.append(tag) 24 | self.tb.start(tag, dict(attrs)) 25 | 26 | def handle_endtag(self, tag): 27 | try: 28 | expected = self.tag_stack.pop() 29 | except IndexError: 30 | self._log('warn', self.url, u'HTML Error', 31 | u'Tried to close tag <%s> after root element' % (tag,)) 32 | return 33 | 34 | if expected != tag: 35 | if tag in self.tag_stack: 36 | # Someone forgot to close a tag 37 | while expected != tag: 38 | if expected not in ['meta', 'input', 'br', 'hr', 'img']: 39 | self._log('warn', self.url, u'HTML Error', 40 | u'Unclosed tag <%s>' % expected) 41 | self.tb.end(expected) 42 | expected = self.tag_stack.pop() 43 | else: 44 | # Random closing tag 45 | self._log('warn', self.url, u'HTML Error', 46 | u'Encountered , expected ' 47 | % (tag, expected)) 48 | # Re-add the expected element in order to suppress 49 | # further errors 50 | self.tag_stack.append(expected) 51 | return 52 | 53 | self.tb.end(tag) 54 | 55 | def handle_data(self, data): 56 | if not data.isspace() and not self.tag_stack: 57 | self._log('warn', self.url, u'HTML Error', 58 | u'Text "%r" outside of root element' % data) 59 | self.tb.data(data) 60 | 61 | def close(self): 62 | # Close all outstanding tags 63 | for tag in self.tag_stack: 64 | self._log('warn', self.url, u'HTML Error', u'Unclosed <%s>' % tag) 65 | self.tb.end(tag) 66 | self.tag_stack.clear() 67 | 68 | HTMLParser.close(self) 69 | try: 70 | res = self.tb.close() 71 | assert res is not None, u'Document should not be empty' 72 | return res 73 | except AssertionError as error: 74 | self._log('warn', self.url, u'HTML Error', error.args[0]) 75 | # Return a minimal tree 76 | return xml.etree.ElementTree.Element('html') 77 | -------------------------------------------------------------------------------- /webvulnscan/log.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | LogEntry = collections.namedtuple( 4 | 'LogEntry', ['level', 'target', 'group', 'message', 'request']) 5 | 6 | _LEVEL_I18N = { 7 | u'warn': u'Warning', 8 | u'vuln': u'Vulnerability', 9 | u'info': u'Info', 10 | } 11 | LEVELS = (u'info', u'warn', u'vuln') 12 | 13 | 14 | def entry_str(entry): 15 | if entry.request is None: 16 | return '%s: %s %s %s' % ( 17 | _LEVEL_I18N[entry.level], entry.target, entry.group, entry.message) 18 | else: 19 | return '%s: %s %s %s | Request: %s' % ( 20 | _LEVEL_I18N[entry.level], entry.target, entry.group, entry.message, 21 | entry.request.url) 22 | 23 | 24 | class AbortProcessing(Exception): 25 | """ Stop searching now. """ 26 | 27 | 28 | class Log(object): 29 | def __init__(self, abort=False, verbosity=u'warn', direct_print=False): 30 | self.abort = abort 31 | self.entries = [] 32 | self.verbosity = verbosity 33 | self.direct_print = direct_print 34 | 35 | def __call__(self, level, target, group, message=u'', request=None): 36 | assert level in LEVELS 37 | if LEVELS.index(level) < LEVELS.index(self.verbosity): 38 | return # Ignore this log entry 39 | 40 | entry = LogEntry(level, target, group, message, request) 41 | self.entries.append(entry) 42 | if self.abort: 43 | raise AbortProcessing() 44 | 45 | if self.direct_print: 46 | print(entry_str(entry)) 47 | 48 | def print_report(self, summarize=True): 49 | summary = collections.defaultdict(set) 50 | for e in self.entries: 51 | summary[(e.level, e.group, e.message)].add(e) 52 | 53 | for k, sum_entries in sorted(summary.items()): 54 | level, group, message = k 55 | if summarize and len(sum_entries) > 3: 56 | print(entry_str(sorted(sum_entries)[0]) + 57 | ' (and %d similar)' % (len(sum_entries) - 1)) 58 | else: 59 | for e in sorted(sum_entries): 60 | print(entry_str(e)) 61 | 62 | __all__ = ['AbortProcessing', 'Log'] 63 | -------------------------------------------------------------------------------- /webvulnscan/options.py: -------------------------------------------------------------------------------- 1 | from optparse import OptionParser, OptionGroup, Values 2 | 3 | from .attacks import all_attacks 4 | from .utils import read_config 5 | 6 | 7 | def parse_options(): 8 | parser = OptionParser(usage='usage: %prog [options] url...') 9 | 10 | default_options = OptionGroup(parser, "Default", "") 11 | default_options.add_option('--verbose', '-v', default=None, dest="verbose", 12 | action="store_true", 13 | help="Print the current targets, etc.") 14 | default_options.add_option('--dont-filter', default=True, dest="do_print", 15 | action="store_false", 16 | help="Write output directly to the command" 17 | "line, don't filter it.") 18 | default_options.add_option('--vulnerabilities-only', default=False, 19 | dest="vuln_only", action="store_true", 20 | help="Print only vulnerabilities " 21 | "(i.e. no warnings)") 22 | default_options.add_option('--abort-early', '-a', default=False, 23 | dest="abort_early", action="store_true", 24 | help="Exit on first found vulnerability.") 25 | default_options.add_option('--import-cookies', default=None, 26 | dest="import_cookies", 27 | help="Given a file, it will import it." 28 | "(Hint: Useful to avoid Captchas...)") 29 | parser.add_option_group(default_options) 30 | 31 | crawling_options = OptionGroup(parser, "Crawling", 32 | "This section provides information" 33 | "about the different crawling options.") 34 | crawling_options.add_option('--no-crawl', action='store_true', 35 | dest='no_crawl', 36 | help="DO NOT search for links on the target") 37 | crawling_options.add_option('--whitelist', default=[], 38 | dest="whitelist", 39 | help="Hosts which are allowed to be crawled.") 40 | crawling_options.add_option('--blacklist', default=[], dest="blacklist", 41 | action="append", 42 | help="Specify sites which shouldn't be" 43 | "visited or attacked. (Hint: logout)") 44 | 45 | parser.add_option_group(crawling_options) 46 | authentification_options = OptionGroup(parser, "Authentification", 47 | "Authentification to a specific" 48 | " post site.") 49 | authentification_options.add_option('--auth', default=None, 50 | dest="auth_url", 51 | help="Post target for " 52 | "authentification") 53 | authentification_options.add_option('--auth-data', dest='auth_data', 54 | action='append', type='str', 55 | default=[], 56 | help="A post parameter in the " 57 | "form of targetname=targetvalue") 58 | authentification_options.add_option('--form-page', dest='form_page', 59 | default=None, 60 | help="The site of the form you want " 61 | "to use to sign in") 62 | authentification_options.add_option('--form-id', dest='form_id', 63 | default=None, 64 | help="The id of the form you want " 65 | "to use to sign in.") 66 | authentification_options.add_option('--form-data', dest='form_data', 67 | action='append', type='str', 68 | default=[], 69 | help="A field you want to set " 70 | "manually.") 71 | parser.add_option_group(authentification_options) 72 | 73 | configuration_options = OptionGroup(parser, "Configuration", 74 | "You are also able to write your" 75 | " specified parameters in a file" 76 | " for easier usage.") 77 | configuration_options.add_option('--config', '-c', metavar='FILE', 78 | dest="read_config", 79 | help="Read the parameters from FILE") 80 | configuration_options.add_option('--write-config', metavar='FILE', 81 | dest="write_config", 82 | help="Insted of running the options," 83 | ' write them to the specified file ("-" ' 84 | 'for standard output).') 85 | parser.add_option_group(configuration_options) 86 | 87 | # filter_options = OptionGroup(parser, "Filter", 88 | # "Functions which may" 89 | # "enhance user experience") 90 | # filter_options.add_option("--no-heuristics", 91 | # dest="no_heuristics" 92 | # help="Do not filter results") 93 | # parser.add_option_group(filter_options) 94 | 95 | # Options for scanning for specific vulnerabilities. 96 | attack_options = OptionGroup(parser, "Attacks", 97 | "If you specify own or several of the " 98 | "options _only_ this/these will be run. " 99 | "If you don't specify any, all will be " 100 | "run.") 101 | for attack in all_attacks(): 102 | attack_options.add_option('--' + attack.__name__, dest=attack.__name__, 103 | action="store_true", default=False) 104 | attack_options.add_option('--except-' + attack.__name__, 105 | dest=attack.__name__ + "_except", 106 | action="store_true", default=False) 107 | parser.add_option_group(attack_options) 108 | 109 | # Get default values 110 | options, arguments = parser.parse_args([]) 111 | 112 | # Parse command line 113 | cli_options = Values() 114 | _, cli_arguments = parser.parse_args(values=cli_options) 115 | 116 | # Update default values with configuration file 117 | config_fn = cli_options.__dict__.get('read_config') 118 | if config_fn is not None: 119 | read_options, read_arguments = read_config(config_fn, parser) 120 | options.__dict__.update(read_options) 121 | arguments += read_arguments 122 | 123 | # Update actual CLI options 124 | options.__dict__.update(cli_options.__dict__) 125 | arguments += cli_arguments 126 | 127 | if not arguments and not options.write_config: 128 | parser.error(u'Need at least one target') 129 | 130 | return (options, arguments) 131 | -------------------------------------------------------------------------------- /webvulnscan/page.py: -------------------------------------------------------------------------------- 1 | """ Page.py module implements a page """ 2 | from .html_parser import parse_html 3 | 4 | from .compat import urljoin, parse_qsl 5 | 6 | from .form import Form 7 | from re import search 8 | 9 | 10 | class Page(object): 11 | def __init__(self, log, request, html, headers, status_code): 12 | assert hasattr(request, 'url') 13 | self.request = request 14 | self.html = html 15 | self.headers = headers 16 | self.status_code = status_code 17 | self.document = parse_html(html, request.url, log) 18 | 19 | @property 20 | def url(self): 21 | return self.request.url 22 | 23 | @property 24 | def url_parameters(self): 25 | _, _, url = self.url.partition("?") 26 | return parse_qsl(url) 27 | 28 | def get_forms(self, blacklist=[]): 29 | """ Generator for all forms on the page. """ 30 | for form in self.document.findall('.//form'): 31 | generated = Form(self.url, form) 32 | 33 | if any(search(x, generated.action) for x in blacklist): 34 | continue 35 | 36 | yield generated 37 | 38 | def get_links(self, blacklist=[]): 39 | """ Generator for all links on the page. """ 40 | for link in self.document.findall('.//a[@href]'): 41 | href = link.attrib.get('href') 42 | url = urljoin(self.url, href) 43 | if any(search(x, url) for x in blacklist): 44 | continue 45 | yield url 46 | -------------------------------------------------------------------------------- /webvulnscan/request.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import sys 3 | 4 | from . import compat 5 | from .compat import urlencode, parse_qs 6 | 7 | 8 | class Request(compat.Request): 9 | def __init__(self, url, parameters=None, headers=None): 10 | self.parameters = parameters 11 | if parameters is None: 12 | data = None 13 | else: 14 | if sys.version_info >= (3, 0): 15 | data = urlencode(parameters).encode('utf-8') 16 | else: 17 | byte_parameters = dict( 18 | (k.encode('utf-8'), v.encode('utf-8')) 19 | for k, v in parameters.items()) 20 | data = urlencode(byte_parameters) 21 | assert isinstance(data, bytes) 22 | if headers is None: 23 | headers = {} 24 | compat.Request.__init__(self, url, data, headers) 25 | 26 | def copy(self): 27 | return copy.copy(self) 28 | 29 | @property 30 | def url(self): 31 | return self.get_full_url() 32 | -------------------------------------------------------------------------------- /webvulnscan/textarea.py: -------------------------------------------------------------------------------- 1 | DEFAULT_VALUE = "Lorem ipsum dolor sit amet, consetetur sadipscing elitr," \ 2 | + "sed diam nonumy eirmod tempor invidunt ut labore et " \ 3 | + "dolore magna aliquyam" 4 | 5 | 6 | class TextArea(object): 7 | def __init__(self, element): 8 | self.element = element 9 | 10 | def _get_attrib_value(self, name): 11 | value = self.element.attrib.get(name) 12 | 13 | if value: 14 | return value 15 | 16 | return "" 17 | 18 | @property 19 | def get_type(self): 20 | return "textarea" 21 | 22 | @property 23 | def get_name(self): 24 | return self._get_attrib_value('name') 25 | 26 | def guess_value(self): 27 | placeholder = self._get_attrib_value("placeholder") 28 | if placeholder == "": 29 | return DEFAULT_VALUE 30 | else: 31 | return placeholder 32 | -------------------------------------------------------------------------------- /webvulnscan/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions described here are for python 2/3 compability and other tasks. 3 | """ 4 | 5 | from .compat import ( 6 | urlparse, urlencode, urljoin, parse_qsl, parse_qs) 7 | 8 | import email.parser 9 | import io 10 | import json 11 | import re 12 | import sys 13 | 14 | # Safe content types (will not be rendered as a webpage by the browser) 15 | NOT_A_PAGE_CONTENT_TYPES = frozenset([ 16 | 'text/plain', 17 | 'text/x-python', 18 | 'image/gif', 19 | 'image/jpeg', 20 | 'image/png', 21 | 'image/svg+xml', 22 | ]) 23 | HTML_CONTENT_TYPES = frozenset([ 24 | "text/html", 25 | "application/xhtml+xml", 26 | ]) 27 | 28 | 29 | def parse_content_type(val, logfunc=None): 30 | if val: 31 | content_type, _, encoding = val.partition(";") 32 | 33 | if content_type in NOT_A_PAGE_CONTENT_TYPES: 34 | return (content_type, None) 35 | 36 | if content_type not in HTML_CONTENT_TYPES: 37 | if logfunc: 38 | logfunc(u'Strange content type', content_type) 39 | 40 | attrib_name, _, charset = encoding.partition('=') 41 | if attrib_name.strip() != "charset": 42 | if logfunc: 43 | logfunc(u'No Charset set') 44 | charset = 'utf-8' 45 | else: 46 | if logfunc: 47 | logfunc(u'No Content-Type header, assuming text/html') 48 | charset = 'utf-8' 49 | content_type = 'text/html' 50 | 51 | return (content_type, charset) 52 | 53 | 54 | def read_config(config_file, parser): 55 | with io.open(config_file, 'r', encoding='utf-8') as f: 56 | values = json.load(f) 57 | 58 | return values['options'], values['arguments'] 59 | 60 | 61 | def write_json(obj, filename, **kwargs): 62 | if filename == u'-': 63 | out = sys.stdout 64 | else: 65 | if sys.version_info >= (3, 0): 66 | out = open(filename, 'w+', encoding='utf-8') 67 | else: 68 | # In Python 2.x, json.dump expects a bytestream 69 | out = open(filename, 'wb') 70 | 71 | with out: 72 | json.dump(obj, out, **kwargs) 73 | 74 | 75 | def write_config(filename, options, arguments): 76 | options_dict = options.__dict__.copy() 77 | del options_dict['write_config'] 78 | del options_dict['read_config'] 79 | write_json({"options": options_dict, "arguments": arguments}, filename, 80 | indent=4) 81 | 82 | 83 | def modify_parameter(parameters, target_name, value): 84 | res = parameters.copy() 85 | res[target_name] = value 86 | return res 87 | 88 | 89 | def change_parameter(url, parameter, new_value): 90 | """ Returns a new url where the parameter is changed. """ 91 | url_query = urlparse(url).query 92 | query = dict(parse_qsl(url_query)) 93 | 94 | if query: 95 | for name, _ in query.items(): 96 | if name == parameter: 97 | query[name] = new_value 98 | 99 | encoded = "?" + urlencode(query) 100 | return urljoin(url, encoded) 101 | else: 102 | return url 103 | 104 | 105 | def get_url_host(url): 106 | """ Returns the server of a name.""" 107 | return urlparse(url).netloc 108 | 109 | 110 | def get_page_text(page): 111 | if page.document.text: 112 | yield page.document.text 113 | 114 | for element in page.document.findall('.//*'): 115 | if element.text: 116 | yield element.text 117 | 118 | 119 | def attack(searchfunc=None): 120 | if searchfunc is None: 121 | searchfunc = lambda page: [(page,)] 122 | 123 | def run(cls, client, log, page): 124 | for s in cls.search(page): 125 | cls.attack(client, log, *s) 126 | 127 | def decorator(attackfunc): 128 | return type(attackfunc.__name__, (object,), { 129 | 'attack': staticmethod(attackfunc), 130 | 'search': staticmethod(searchfunc), 131 | '__new__': run, 132 | }) 133 | return decorator 134 | 135 | 136 | def could_be_secret(s): 137 | return len(s) >= 6 and re.match(r'^[0-9a-fA-F$!]+$', s) 138 | 139 | 140 | def get_param(url, pname): 141 | """ Return a GET parameter from a URL """ 142 | return parse_qs(urlparse(url).query).get(pname, [u''])[0] 143 | 144 | 145 | def add_get_params(url, params): 146 | assert isinstance(params, dict) 147 | 148 | for key in params.keys(): 149 | params[key] = params[key].encode('ascii', 'ignore') 150 | 151 | return (url + 152 | (u'&' if u'?' in url else '?') + 153 | urlencode(params)) 154 | 155 | 156 | def parse_http_headers(bs): 157 | assert isinstance(bs, bytes) 158 | s = bs.decode('utf-8') 159 | p = email.parser.Parser() 160 | res = p.parse(io.StringIO(s), headersonly=True) 161 | return res 162 | --------------------------------------------------------------------------------