├── .gitignore
├── .travis.yml
├── LICENSE
├── README.rst
├── test
├── test_breach.py
├── test_clickjack.py
├── test_crawler.py
├── test_crlf.py
├── test_csrf.py
├── test_exotic_characters.py
├── test_form.py
├── test_html_parser.py
├── test_input.py
├── test_page.py
├── test_scan_cookies.py
├── test_textarea.py
├── test_utils.py
├── test_xss.py
├── tutil.py
└── web_runner.py
└── webvulnscan
├── __init__.py
├── __main__.py
├── attacks
├── __init__.py
├── breach.py
├── clickjack.py
├── cookiescan.py
├── crlf.py
├── csrf.py
├── exotic_characters.py
└── xss.py
├── client.py
├── compat.py
├── crawler.py
├── form.py
├── form_input.py
├── html_parser.py
├── log.py
├── options.py
├── page.py
├── request.py
├── textarea.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 |
21 | # Installer logs
22 | pip-log.txt
23 |
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | .cover_html
29 |
30 | # Translations
31 | *.mo
32 |
33 | # Mr Developer
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 |
38 | # Vim
39 | *.swp
40 | *.swo
41 | *.swl
42 | *.swm
43 | *.swn
44 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "2.7"
4 | - "3.3"
5 | install:
6 | - pip install pep8 --use-mirrors
7 | - pip install coverage --use-mirrors
8 | script:
9 | - pep8 webvulnscan test
10 | - nosetests test --verbose --with-coverage --cover-package=webvulnscan --cover-min-percentage=70
11 | notifications:
12 | email:
13 | - phihag@phihag.de
14 | - liebig.richard@hotmail.com
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2013 Richard Liebig
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ===========
2 | webvulnscan
3 | ===========
4 |
5 | .. image:: https://travis-ci.org/hhucn/webvulnscan.png?branch=master
6 | :target: https://travis-ci.org/hhucn/webvulnscan/builds
7 |
8 | Quickstart
9 | ----------
10 |
11 | .. code:: sh
12 |
13 | $ git clone https://github.com/hhucn/webvulnscan.git
14 | $ cd webvulnscan
15 | $ python -m webvulnscan http://example.target/
16 |
17 | What is it?
18 | -----------
19 | As the name suggests, webvulnscan is (or wants to be someday) a security scanner for Web Applications with the intent of automatic testing, licensed under the MIT-License. It's written in Python(compatible with 2.7 and 3.3) and doesn't require any external libraries.
20 |
21 | Features
22 | --------
23 | - Link & Form Crawling
24 | - Detection for XSS, CRSF, Breach, Clickjacking and cacheable Cookies
25 | - White- and Blacklisting of Pages
26 | - Authentification
27 |
28 | Examples
29 | --------
30 |
31 | vulnsrv
32 | ~~~~~~~
33 |
34 | vulnsrv_ is sample exploitable website for educational purposes. We will use it here as an example:
35 |
36 | .. _vulnsrv: https://github.com/phihag/vulnsrv
37 |
38 | .. code:: sh
39 |
40 | $ wget https://raw.github.com/phihag/vulnsrv/master/vulnsrv.py
41 | $ python vulnsrv.py
42 |
43 | It's running now under http://localhost:8666/ on your computer. Open now a new console for running webvulnsrv. Assuming that you are in your home directory and already cloned webvulnscan...
44 |
45 | .. code:: sh
46 |
47 | $ cd webvulnscan
48 | $ python -m webvulnscan http://localhost:8666/
49 | Vulnerability: CSRF under http://localhost:8666/csrf/send
50 | Vulnerability: XSS on http://localhost:8666/xss/?username=Benutzer%21 in parameter username
51 |
52 | You may notice that this aren't all vulnerabilties, but webvulnsrv is still a work in progress.
53 |
54 | Specific scanning
55 | ~~~~~~~~~~~~~~~~~
56 |
57 | If you want to scan only for specific vulnerabilities(for example, only for BREACH), you simply try the following:
58 |
59 | .. code:: sh
60 |
61 | $ python -m webvulnscan --breach http://localhost:8666/
62 |
63 | or you want to scan for XSS and CSRF vulnerabilities:
64 |
65 | .. code:: sh
66 |
67 | $ python -m webvulnscan --xss --csrf http://localhost:8666/
68 |
69 | What if you want to be more specific? What if you want to test only one site? Use --no-crawl
70 |
71 | .. code:: sh
72 |
73 | $ python -m webvulnscan --no-crawl http://localhost:8666/
74 |
75 | And the links will be ignored. However, Forms are not.
76 |
77 | White- and Blacklisting
78 | ~~~~~~~~~~~~~~~~~~~~~~~
79 |
80 | Sometimes, you have links on a site you that you want to test to. But the default whitelist points only on the host of the given link. Here's how you can add more:
81 |
82 | .. code:: sh
83 |
84 | $ python -m webvulnscan --whitelist http://ex.am.ple/ http://localhost/
85 |
86 | However, what if you want to use Authentification and there's a /logout-Link? If the crawler hits it, the session is lost. Simply Blacklist it!
87 |
88 | .. code:: sh
89 |
90 | $ python -m webvulnscan --blacklist logout http://localhost/
91 |
92 | And the site will be never visited. Please note that the blacklist Parameter accepts Regular Expressions, the python version.
93 |
94 | Authentification
95 | ~~~~~~~~~~~~~~~~
96 |
97 | We have a login handler under /perform_login which wants the post-fields username and password, who can we log in? The account we want to use has the username "abc" and password "123456". The command would look like the following:
98 |
99 | .. code:: sh
100 |
101 | $ python -m webvulnscan --auth http://no.tld/perform_login --auth-data username=abc --auth-data password=123456 http://no.tld/
102 |
103 | Yes, you have to use the --auth-data option for every field you want to send.
104 |
105 | Configuration
106 | ~~~~~~~~~~~~~
107 |
108 | As you see, there you end up with a lot of parameters in the end. To avoid typing so much, you can add the --write-out-Option and
109 |
110 | .. code:: sh
111 |
112 | $ python -m webvulnscan --write-out=example.conf http://localhost:8666/
113 |
114 | save it to a file. If you want to rerun the test because you (think you) fixed it, simply run:
115 |
116 | .. code:: sh
117 |
118 | $ python -m webvulnscan -c example.conf
119 |
--------------------------------------------------------------------------------
/test/test_breach.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import zlib
3 | import io
4 | import unittest
5 |
6 | import tutil
7 | import webvulnscan.attacks.breach
8 | from webvulnscan.page import Page
9 |
10 |
11 | try:
12 | from urllib.parse import unquote
13 | except ImportError:
14 | from urllib2 import unquote
15 |
16 |
17 | def _gzip_test_controller(html):
18 | def on_request(request):
19 | content = html.encode('utf-8')
20 | out_headers = {'Content-Type': 'text/html; charset=utf-8'}
21 | if 'gzip' in request.headers.get('Acccept-Encoding', 'identity'):
22 | outs = io.BytesIO()
23 | with GZipFile(outs) as gf:
24 | gf.write(content)
25 | content = outs.getvalue()
26 | out_headers['Content-Encoding'] = 'gZiP'
27 | return (200, content, out_headers)
28 | return on_request
29 |
30 |
31 | def _deflate_test_controller(html):
32 | def on_request(request):
33 | content = html.encode('utf-8')
34 | out_headers = {'Content-Type': 'text/html; charset=utf-8'}
35 | if 'deflate' in request.headers.get('Acccept-Encoding', 'identity'):
36 | content = zlib.compress(content)
37 | out_headers['Content-Encoding'] = 'deflate'
38 | return (200, content, out_headers)
39 | return on_request
40 |
41 |
42 | def _breach_vulnerable():
43 | token = tutil.random_token(16)
44 | return {
45 | '/': _gzip_test_controller(u'''
46 |
47 |
48 |
52 |
53 |
54 | ''' % token),
55 | '/post': tutil.TokenController(token)
56 | }
57 |
58 |
59 | class BreachTest(unittest.TestCase):
60 | attack = webvulnscan.attacks.breach
61 |
62 | @tutil.webtest(False)
63 | def test_breach_static_site():
64 | return {'/': u''}
65 |
66 | @tutil.webtest(False)
67 | def test_activated_gzip():
68 | return {
69 | '/': _gzip_test_controller(u'')
70 | }
71 |
72 | @tutil.webtest(False)
73 | def test_no_token():
74 | return {'/': _gzip_test_controller(u'''
75 |
76 |
77 |
82 |
83 |
84 | '''),
85 | '/search': (
86 | 200,
87 | b'Here are your results',
88 | {'Content-Type': 'text/html; charset=utf-8'})}
89 |
90 | @tutil.webtest(True)
91 | def test_breach_vulnerable():
92 | return _breach_vulnerable()
93 |
94 | @unittest.skip('Not yet supported')
95 | def test_breach_vulnerable_urltoken():
96 | token = tutil.random_token(16)
97 | html = u'''
98 |
99 |
100 |
103 |
104 |
105 | ''' % token
106 | client = tutil.TestClient({
107 | '/': _gzip_test_controller(html),
108 | '/post': tutil.TokenController(token, method='get')
109 | })
110 | client.log.assert_count(1)
111 |
112 | @tutil.webtest(False)
113 | def test_activated_deflate():
114 | return {'/': _deflate_test_controller(u'')}
115 |
116 | @tutil.webtest(False)
117 | def test_no_token_with_deflate():
118 | html = u'''
119 |
120 |
121 |
126 |
127 |
128 | '''
129 | return {
130 | '/': _deflate_test_controller(html),
131 | '/search': (
132 | 200,
133 | b'Here are your results',
134 | {'Content-Type': 'text/html; charset=utf-8'})
135 | }
136 |
137 | @tutil.webtest(True)
138 | def test_breach_vulnerable_with_deflate():
139 | token = tutil.random_token(16)
140 | html = u'''
141 |
142 |
143 |
147 |
148 |
149 | ''' % token
150 | return {
151 | '/': _deflate_test_controller(html),
152 | '/post': tutil.TokenController(token),
153 | }
154 |
155 | @unittest.skip('Not yet supported')
156 | def test_breach_vulnerable_urltoken_with_deflate():
157 | token = tutil.random_token(16)
158 | html = u'''
159 |
160 |
161 |
164 |
165 |
166 | ''' % token
167 | client = tutil.TestClient({
168 | '/': _deflate_test_controller(html),
169 | '/post': tutil.TokenController(token, method='get')
170 | })
171 | client.log.assert_count(1)
172 |
173 | if __name__ == '__main__':
174 | unittest.main()
175 |
--------------------------------------------------------------------------------
/test/test_clickjack.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import unittest
3 |
4 | import tutil
5 | import webvulnscan
6 |
7 | FORM_HTML = b'''this is a form:
8 |
11 | '''
12 |
13 |
14 | class ClickjackTest(unittest.TestCase):
15 | attack = webvulnscan.attacks.clickjack
16 |
17 | @tutil.webtest(False)
18 | def test_clickjack():
19 | return {
20 | '/': u'''
21 | Links are (supposed to be) idempotent
22 | ''',
23 | '/go': u'''Nothing here!'''
24 | }
25 |
26 | @tutil.webtest(False)
27 | def test_clickjack_get_form():
28 | return {
29 | '/': u'''
30 |
34 | '''
35 | }
36 |
37 | @tutil.webtest(False)
38 | def test_clickjack_get_form_second():
39 | return {
40 | '/': u'''
41 |
45 | '''
46 | }
47 |
48 | @tutil.webtest(True)
49 | def test_clickjack_vulnerable_site():
50 | return {
51 | '/': (
52 | 200, FORM_HTML,
53 | {'Content-Type': 'text/html; charset=utf-8'}),
54 | '/delete': u'''Executed!'''
55 | }
56 |
57 | @tutil.webtest(True)
58 | def test_clickjack_vulnerable_alternative_content_type():
59 | return {
60 | '/': (
61 | 200, FORM_HTML,
62 | {'Content-Type': 'application/xhtml+xml; charset=utf-8'}),
63 | '/delete': u'''Executed!'''
64 |
65 | }
66 |
67 | @tutil.webtest(False)
68 | def test_clickjack_secured_site():
69 | return {
70 | '/': (
71 | 200, FORM_HTML,
72 | {'Content-Type': 'text/html; charset=utf-8',
73 | 'X-Frame-Options': 'DENY'}),
74 | '/delete': u'''Executed!'''
75 | }
76 |
77 | @tutil.webtest(False)
78 | def test_clickjack_sameorigin_site():
79 | return {
80 | '/': (
81 | 200, FORM_HTML,
82 | {'Content-Type': 'text/html; charset=utf-8',
83 | 'X-Frame-Options': 'SAMEORIGIN'}),
84 | '/delete': u'''Executed!'''
85 | }
86 |
87 | @tutil.webtest(False)
88 | def test_clickjack_allowfrom_site():
89 | return {
90 | '/': (
91 | 200, FORM_HTML,
92 | {'Content-Type': 'text/html; charset=utf-8',
93 | 'X-Frame-Options': 'ALLOW-FROM http://safe.example.org/'}),
94 | '/delete': u'''Executed!'''
95 | }
96 |
97 | @tutil.webtest(True)
98 | def test_invalid_header():
99 | return {
100 | '/': (
101 | 200, FORM_HTML,
102 | {'Content-Type': 'text/html; charset=utf-8',
103 | 'X-Frame-Options': 'None please!'}),
104 | '/delete': u'''Executed!'''
105 | }
106 |
--------------------------------------------------------------------------------
/test/test_crawler.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import tutil
4 | import webvulnscan.crawler
5 |
6 |
7 | class CrawlerTest(unittest.TestCase):
8 |
9 | def _assert_crawled(self, crawler, client, expected_raw):
10 | expected = set(map(client.full_url, expected_raw))
11 | matched = set(page.url for page in crawler)
12 | self.assertEqual(matched, set(expected))
13 |
14 | def test_imglink(self):
15 | client = tutil.TestClient({
16 | u'/': (
17 | 200,
18 | b'another page ',
19 | {'Content-Type': 'text/html; charset=utf-8'}),
20 | u'/b': (
21 | 200,
22 | b'image ',
23 | {'Content-Type': 'text/html; charset=utf-8'}),
24 | u'/img': (
25 | 200,
26 | b'[image]resolve this ',
27 | {'Content-Type': 'image/png'}),
28 | })
29 | crawler = webvulnscan.crawler.Crawler(
30 | client.ROOT_URL, tutil.ContainsEverything(), client=client)
31 | self._assert_crawled(crawler, client, [u'/', u'/b'])
32 |
33 | def test_invalid_characters(self):
34 | client = tutil.TestClient({
35 | u'/': (
36 | 200,
37 | b'\xfc',
38 | {'Content-Type': 'text/html; charset=utf-8'}),
39 | })
40 | crawler = webvulnscan.crawler.Crawler(
41 | client.ROOT_URL, tutil.ContainsEverything(), client=client)
42 |
43 | list(crawler) # Crawl all pages - this should not throw an exception
44 | client.log.assert_found('0xfc')
45 | client.log.assert_count(1)
46 |
47 |
48 | if __name__ == '__main__':
49 | unittest.main()
50 |
--------------------------------------------------------------------------------
/test/test_crlf.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import cgi
3 | import tutil
4 | import unittest
5 |
6 | from webvulnscan.page import Page
7 | from webvulnscan.utils import get_param, parse_http_headers
8 | import webvulnscan.attacks.crlf
9 |
10 | try:
11 | from urllib.parse import unquote
12 | except ImportError:
13 | from urllib2 import unquote
14 |
15 |
16 | def header_site(getparam, vulnerable):
17 | if vulnerable:
18 | encode = lambda s: s.encode('utf-8')
19 | else:
20 | # There is no defined encoding in practice, so let's just base64
21 | # all user input
22 | encode = lambda s: base64.b64encode(s.encode('utf-8'))
23 |
24 | def site(req):
25 | p = getparam(req)
26 | if p is None:
27 | p = ""
28 | html = (u'%s' % cgi.escape(p)).encode('utf-8')
29 | header_bytes = b'\r\n'.join([
30 | b'Content-Type: text/html; charset=utf-8',
31 | b'Set-Cookie: url=' + encode(p)
32 | ])
33 | headers = parse_http_headers(header_bytes)
34 | parsed_headers = {}
35 | for value, key in headers.items():
36 | parsed_headers[value] = key
37 | return (200, html, parsed_headers)
38 | return site
39 |
40 |
41 | class CRLFAttackerTest(unittest.TestCase):
42 | attack = webvulnscan.attacks.crlf
43 | argument = "?foo=bar"
44 |
45 | @tutil.webtest(False)
46 | def test_clrf_static_site():
47 | return {
48 | '/': lambda req: u'%s' % cgi.escape(req.url),
49 | }
50 |
51 | @tutil.webtest(True)
52 | def test_clrf_vulnerable_url_site():
53 | return {
54 | '/': header_site(lambda req: get_param(req.url, 'foo'), True)
55 | }
56 |
57 | @tutil.webtest(False)
58 | def test_clrf_secure_url_site():
59 | return {
60 | '/': header_site(lambda req: get_param(req.url, 'foo'), False)
61 | }
62 |
63 | @tutil.webtest(True)
64 | def test_clrf_vulnerable_post_site():
65 | return {
66 | '/': u'''''',
68 | '/post': header_site(lambda req: req.parameters.get('foo'), True)
69 | }
70 |
71 | @tutil.webtest(False)
72 | def test_clrf_secure_post_site():
73 | return {
74 | '/': u'''''',
76 | '/post': header_site(lambda req: req.parameters.get('foo'), False)
77 | }
78 |
79 | @tutil.webtest(True)
80 | def test_clrf_vulnerable_get_site():
81 | return {
82 | '/': u'''''',
84 | '/post': header_site(lambda req: get_param(req.url, 'foo'), True)
85 | }
86 |
87 | @tutil.webtest(False)
88 | def test_clrf_secure_get_site():
89 | return {
90 | '/': u'''''',
92 | '/post': header_site(lambda req: get_param(req.url, 'foo'), False)
93 | }
94 |
--------------------------------------------------------------------------------
/test/test_csrf.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import unittest
3 |
4 | import tutil
5 | import webvulnscan.attacks.csrf
6 | from webvulnscan.utils import get_param
7 |
8 |
9 | def csrf_page(test_token):
10 | def sitef(req):
11 | if test_token(req):
12 | return u'Thanks, posted'
13 | else:
14 | return (
15 | 400,
16 | b'CSRF denied',
17 | {'Content-Type': 'text/html; charset=utf-8'})
18 | return sitef
19 |
20 | FORM_HTML = u'''
21 |
25 | '''
26 |
27 |
28 | def test_csrf_protected_form():
29 | token = tutil.random_token(8)
30 | return {
31 | '/': FORM_HTML % token,
32 | '/s': csrf_page(lambda req: get_param(req.url, 'text'))
33 | }
34 |
35 |
36 | def test_csrf_vulnerable_form():
37 | token = tutil.random_token(8)
38 | return {
39 | '/': FORM_HTML % token,
40 | '/s': csrf_page(lambda req: True)
41 | }
42 |
43 |
44 | class CsrfTest(unittest.TestCase):
45 | attack = webvulnscan.attacks.csrf
46 |
47 | @tutil.webtest(False)
48 | def test_static_site():
49 | return {
50 | '/': u'''''',
51 | }
52 |
53 | @tutil.webtest(False)
54 | def test_csrf_protected_form():
55 | return test_csrf_protected_form()
56 |
57 | @tutil.webtest(True)
58 | def test_csrf_vulnerable_post_form():
59 | return test_csrf_vulnerable_form()
60 |
--------------------------------------------------------------------------------
/test/test_exotic_characters.py:
--------------------------------------------------------------------------------
1 | import tutil
2 | import unittest
3 | import sys
4 |
5 | import webvulnscan.attacks.exotic_characters
6 | from webvulnscan.utils import get_param
7 |
8 | try:
9 | from urllib.parse import unquote
10 | except ImportError:
11 | from urllib2 import unquote
12 |
13 |
14 | SHELL_CHARACTERS = u'"\'|;<>\0'
15 | GENERIC_FORM = u'''
16 |
19 | '''
20 |
21 |
22 | def shell_emulation(getinput):
23 | def site(req):
24 | s = getinput(req)
25 | # A real application would run subprocess.Popen(..., shell=True) or so
26 | if any(c in s for c in SHELL_CHARACTERS):
27 | return (
28 | 500,
29 | b'Syntax Error',
30 | {'Content-Type': 'text/html; charset=utf-8'}
31 | )
32 | return u'Process executed.'
33 | return site
34 |
35 |
36 | class ExoticCharacterTest(unittest.TestCase):
37 | attack = webvulnscan.attacks.exotic_characters
38 | argument = '?test=a'
39 |
40 | @tutil.webtest(False)
41 | def test_exotic_characters_static_site():
42 | return {
43 | '/': u'''''',
44 | }
45 |
46 | @tutil.webtest(True)
47 | def test_exotic_characters_url_vulnerable_site():
48 | return {
49 | '/': shell_emulation(lambda req: get_param(req.url, 'test')),
50 | }
51 |
52 | @tutil.webtest(True)
53 | def test_exotic_characters_post_vulnerable_site():
54 | return {
55 | '/': GENERIC_FORM,
56 | '/post': shell_emulation(lambda req: req.parameters['test']),
57 | }
58 |
59 | @tutil.webtest(False)
60 | def test_exotic_characters_valid_parsing():
61 | return {
62 | '/': GENERIC_FORM,
63 | '/post': u'Properly escaped command',
64 | }
65 |
--------------------------------------------------------------------------------
/test/test_form.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import xml.etree.ElementTree as ET
3 |
4 | import tutil
5 | import webvulnscan.form
6 |
7 |
8 | class FormTest(unittest.TestCase):
9 | def test_no_inputs_no_action(self):
10 | doc = ET.fromstring('')
11 | form = webvulnscan.form.Form('http://test/', doc)
12 | self.assertEqual({}, dict(form.get_inputs()))
13 | self.assertEqual("http://test/", form.action)
14 |
15 | def test_no_inputs_with_Action(self):
16 | doc = ET.fromstring('')
17 | form = webvulnscan.form.Form('http://test/', doc)
18 | self.assertEqual(doc.items(), form.document.items())
19 | self.assertEqual(doc.keys(), form.document.keys())
20 | self.assertEqual("http://test/test", form.action)
21 | self.assertEqual("get", form.method)
22 |
23 | def test_one_input_no_action(self):
24 | doc = ''
25 | doc = ET.fromstring(doc)
26 | form = webvulnscan.form.Form('http://test/', doc)
27 | self.assertEqual({"test": "abcdefgh"},
28 | dict(form.get_parameters()))
29 | self.assertEqual("http://test/", form.action)
30 |
31 | def test_one_input_with_action(self):
32 | doc = ''
34 | doc = ET.fromstring(doc)
35 | form = webvulnscan.form.Form('http://test/', doc)
36 | self.assertEqual({"test": "abcdefgh"},
37 | dict(form.get_parameters()))
38 | self.assertEqual("http://test/test", form.action)
39 |
40 | def test_serveral_inputs_no_action(self):
41 | doc = ''
43 | doc = ET.fromstring(doc)
44 | form = webvulnscan.form.Form('http://test/', doc)
45 | self.assertEqual({"test": "abcdefgh", "click": ""},
46 | dict(form.get_parameters()))
47 | self.assertEqual("http://test/", form.action)
48 |
49 | def test_serveral_inputs_with_action(self):
50 | doc = ''
52 | doc = ET.fromstring(doc)
53 | form = webvulnscan.form.Form('http://test/', doc)
54 | self.assertEqual({"test": "abcdefgh", "click": ""},
55 | dict(form.get_parameters()))
56 | self.assertEqual("http://test/action", form.action)
57 |
58 | def test_form_with_textarea(self):
59 | doc = ''
61 | doc = ET.fromstring(doc)
62 | form = webvulnscan.form.Form('http://test/', doc)
63 | self.assertEqual({"test": "random"},
64 | dict(form.get_parameters()))
65 | self.assertEqual("http://test/action", form.action)
66 |
67 | def test_form_get_send(self):
68 | assert_function = self.assertEqual
69 |
70 | class StaticSite(object):
71 | def download_page(self, url, parameters=None,
72 | remember_visited=None):
73 | assert_function("random" in url, True)
74 |
75 | doc = ' '
77 | doc = ET.fromstring(doc)
78 | form = webvulnscan.form.Form('http://test/', doc)
79 | parameters = dict(form.get_parameters())
80 | form.send(StaticSite(), parameters)
81 |
82 | def test_form_post_send(self):
83 | assert_function = self.assertNotEqual
84 |
85 | class StaticSite(object):
86 | def download_page(self, url, parameters=None,
87 | remember_visited=None):
88 | assert_function(parameters, None)
89 |
90 | doc = ' '
92 | doc = ET.fromstring(doc)
93 | form = webvulnscan.form.Form('http://test/', doc)
94 | parameters = dict(form.get_parameters())
95 | form.send(StaticSite(), parameters)
96 |
97 | def test_search_form_class(self):
98 | doc = ' '
99 | doc = ET.fromstring(doc)
100 | form = webvulnscan.form.Form('http://test/', doc)
101 | self.assertEqual(form.is_search_form, True)
102 |
103 | def test_search_form_role(self):
104 | doc = ' '
105 | doc = ET.fromstring(doc)
106 | form = webvulnscan.form.Form('http://test/', doc)
107 | self.assertEqual(form.is_search_form, True)
108 |
--------------------------------------------------------------------------------
/test/test_html_parser.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from xml.etree.ElementTree import tostring
3 |
4 | import tutil
5 | from webvulnscan.html_parser import parse_html
6 |
7 |
8 | class HTMLParserTests(unittest.TestCase):
9 | def test_valid(self):
10 | log = tutil.TestLog()
11 | html = u'ü ä'
12 | parser = parse_html(html, "http://example.site", log=log)
13 | log.assert_count(0)
14 |
15 | def test_forgot_close(self):
16 | log = tutil.TestLog()
17 | html = u'foo'
18 | parse_html(html, "http://example.site", log=log)
19 | log.assert_found(u'Unclosed')
20 | log.assert_found(u'theforgottentag')
21 | log.assert_count(1)
22 |
23 | def test_forgot_close_2(self):
24 | log = tutil.TestLog()
25 | html = u'foo'
26 | parse_html(html, "http://example.site", log=log)
27 | log.assert_found(u'Unclosed')
28 | log.assert_found(u'theforgottentag')
29 | log.assert_found(u'alsonot')
30 | log.assert_count(2)
31 |
32 | def test_superflupus_close(self):
33 | log = tutil.TestLog()
34 | html = u'foo'
35 | parse_html(html, "http://example html", log=log)
36 | log.assert_found(u'superfluous')
37 | log.assert_count(1)
38 |
39 | def test_close_after_root(self):
40 | log = tutil.TestLog()
41 | html = u'foo'
42 | parse_html(html, "http://example.site", log=log)
43 | log.assert_found(u'superfluous')
44 | log.assert_found(u'after root')
45 | log.assert_count(1)
46 |
47 | def test_parse_empty(self):
48 | log = tutil.TestLog()
49 | html = u''
50 | doc = parse_html(html, "http://example.site", log=log)
51 | assert doc is not None
52 | log.assert_count(1)
53 |
54 | def test_parse_textroot(self):
55 | log = tutil.TestLog()
56 | html = u'someText'
57 | parse_html(html, "http://example.site", log=log)
58 | log.assert_found(u'someText')
59 | self.assertTrue(len(log.entries) >= 1)
60 |
61 | def test_parse_text_before_root(self):
62 | log = tutil.TestLog()
63 | html = u'textBefore '
64 | parse_html(html, "http://example.site", log=log)
65 | log.assert_found(u'Text')
66 | log.assert_found(u'textBefore')
67 | log.assert_count(1)
68 |
69 | def test_parse_text_after_root(self):
70 | log = tutil.TestLog()
71 | html = u' c_textAfter'
72 | parse_html(html, "http://example.site", log=log)
73 | log.assert_found(u'Text')
74 | log.assert_found(u'textAfter')
75 | log.assert_count(1)
76 |
77 | def test_parse_whitespace_before_root(self):
78 | log = tutil.TestLog()
79 | html = u' '
80 | parse_html(html, "http://example.site", log=log)
81 | log.assert_count(0)
82 |
83 | def test_parse_whitespace_after_root(self):
84 | log = tutil.TestLog()
85 | html = u' \n\r\t'
86 | parse_html(html, "http://example.site", log=log)
87 | log.assert_count(0)
88 |
89 | def test_fixup_forgotten_closing(self):
90 | log = tutil.TestLog()
91 | html = u'go'
92 | doc = parse_html(html, "http://example.site", log=log)
93 | self.assertEqual(tostring(doc), b'go')
94 | log.assert_found(u'html')
95 | log.assert_count(1)
96 |
97 | def test_empty_tags(self):
98 | log = tutil.TestLog()
99 | html = u' '
100 | doc = parse_html(html, "http://example.site", log=log)
101 | log.assert_count(0)
102 |
103 | if __name__ == '__main__':
104 | unittest.main()
105 |
--------------------------------------------------------------------------------
/test/test_input.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import xml.etree.ElementTree as ET
3 |
4 | import tutil
5 | import webvulnscan.form_input
6 |
7 |
8 | class InputTest(unittest.TestCase):
9 | def test_input_disabled(self):
10 | doc = ET.fromstring(' ')
11 | form_input = webvulnscan.form_input.FormInput(doc)
12 | self.assertEqual(form_input.guess_value(), "")
13 |
14 | def test_input_no_meanings(self):
15 | doc = ET.fromstring(' ')
16 | form_input = webvulnscan.form_input.FormInput(doc)
17 | self.assertEqual(form_input.guess_value(), "")
18 |
19 | def test_input_text_no_value(self):
20 | doc = ET.fromstring(' ')
21 | form_input = webvulnscan.form_input.FormInput(doc)
22 | self.assertEqual(form_input.guess_value(), "abcdefgh")
23 |
24 | def test_input_text_with_value(self):
25 | doc = ET.fromstring(' ')
26 | form_input = webvulnscan.form_input.FormInput(doc)
27 | self.assertEqual(form_input.guess_value(), "hgfedcba")
28 |
29 | def test_input_email_no_value(self):
30 | doc = ET.fromstring(' ')
31 | form_input = webvulnscan.form_input.FormInput(doc)
32 | self.assertEqual(form_input.guess_value(), "ex@amp.le")
33 |
34 | def test_input_email_with_value(self):
35 | doc = ET.fromstring(' '
36 | '')
37 | form_input = webvulnscan.form_input.FormInput(doc)
38 | self.assertEqual(form_input.guess_value(), "ad@ministrat.or")
39 |
40 | def test_input_min_length(self):
41 | doc = ET.fromstring(' ')
42 | form_input = webvulnscan.form_input.FormInput(doc)
43 | self.assertEqual(len(form_input.guess_value()) > 12, True)
44 |
45 | def test_input_max_length(self):
46 | doc = ET.fromstring(' ')
47 | form_input = webvulnscan.form_input.FormInput(doc)
48 | self.assertEqual(len(form_input.guess_value()), 5)
49 |
50 | def test_input_fixed_length(self):
51 | doc = ET.fromstring(' ')
53 | form_input = webvulnscan.form_input.FormInput(doc)
54 | self.assertEqual(len(form_input.guess_value()), 7)
55 |
56 | def test_input_empty_value(self):
57 | doc = ET.fromstring(' ')
58 | form_input = webvulnscan.form_input.FormInput(doc)
59 | self.assertEqual(form_input.get_element_value, '')
60 |
61 | def test_input_value(self):
62 | doc = ET.fromstring(' ')
63 | form_input = webvulnscan.form_input.FormInput(doc)
64 | self.assertEqual(form_input.get_element_value, 'test')
65 |
--------------------------------------------------------------------------------
/test/test_page.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import xml.etree.ElementTree as ET
3 |
4 | import tutil
5 | import webvulnscan.page
6 | import webvulnscan.request
7 |
8 |
9 | def FakePage(html, headers={}, status_code=200, url="http://test/"):
10 | log = tutil.TestLog()
11 | req = webvulnscan.request.Request(url)
12 | return webvulnscan.page.Page(log, req, html=html,
13 | headers=headers, status_code=status_code)
14 |
15 |
16 | class PageTest(unittest.TestCase):
17 | def test_generate_document(self):
18 | doc = 'link here '
19 | parsed = ET.fromstring(doc)
20 | page = FakePage(doc)
21 | self.assertEqual(page.document.keys(), parsed.keys())
22 | self.assertEqual(page.document.items(), page.document.items())
23 |
24 | def test_get_links_no_links(self):
25 | doc = 'link'
26 | page = FakePage(doc)
27 | output = set(page.get_links())
28 | self.assertEqual(output, set())
29 |
30 | def test_get_links_one_link(self):
31 | doc = 'click '
32 | page = FakePage(doc)
33 | output = set(page.get_links())
34 | self.assertEqual(output, {"http://test/test"})
35 |
36 | def test_get_links_several(self):
37 | doc = ' '
38 | page = FakePage(doc)
39 | output = set(page.get_links())
40 | self.assertEqual(output, {"http://test/1", "http://test/2"})
41 |
42 | def test_get_url_parameters_none(self):
43 | doc = u' '
44 | page = FakePage(doc)
45 | output = dict(page.url_parameters)
46 | self.assertEqual(output, dict())
47 |
48 | def test_get_url_parameters_one(self):
49 | doc = u' '
50 | page = FakePage(doc, url=u'http://test/?test=1')
51 | output = dict(page.url_parameters)
52 | self.assertEqual(output, {'test': '1'})
53 |
54 | def test_get_url_parameters_several(self):
55 | doc = u' '
56 | page = FakePage(doc, url=u'http://test/?test=1&other=2')
57 | output = dict(page.url_parameters)
58 | self.assertEqual(output, {'test': '1', 'other': '2'})
59 |
60 | def test_get_forms(self):
61 | html = u" "
62 | page = FakePage(html)
63 | self.assertNotEqual(list(page.get_forms()), None)
64 |
65 | def test_get_forms_blacklisted(self):
66 | html = u" "
67 | blacklist = ["forbidden"]
68 | page = FakePage(html)
69 | self.assertEqual(list(page.get_forms(blacklist=blacklist)), [])
70 |
71 | if __name__ == '__main__':
72 | unittest.main()
73 |
--------------------------------------------------------------------------------
/test/test_scan_cookies.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import sys
3 |
4 | import tutil
5 | import webvulnscan.attacks.cookiescan
6 |
7 |
8 | # A cookie is implicit cacheable, if
9 | # 1. A cookie is set.
10 | # 2. The Expire-Date is in the future
11 | # 3. No Cache-Control is set.
12 |
13 |
14 | def make_urlmap(headers):
15 | headers['Content-Type'] = 'text/html; charset=utf-8'
16 | return {
17 | '/': (200, b'', headers),
18 | }
19 |
20 |
21 | class CookieScanTest(unittest.TestCase):
22 | attack = webvulnscan.attacks.cookiescan
23 |
24 | @tutil.webtest(False)
25 | def test_cookie_static_site():
26 | return make_urlmap({})
27 |
28 | @tutil.webtest(True)
29 | def test_cookie_insecure_site():
30 | return make_urlmap({
31 | "Set-Cookie": "random=test",
32 | })
33 |
34 | @tutil.webtest(False)
35 | def test_cookie_secure_site():
36 | return make_urlmap({
37 | "Set-Cookie": "random=test",
38 | "Cache-Control": "private",
39 | })
40 |
41 | @tutil.webtest(False)
42 | def test_cookie_secure_site_with_max_age():
43 | return make_urlmap({
44 | "Set-Cookie": "random=test",
45 | "Cache-Control": "max-age=0",
46 | })
47 |
--------------------------------------------------------------------------------
/test/test_textarea.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import xml.etree.ElementTree as ET
3 |
4 | import tutil
5 | import webvulnscan.textarea
6 |
7 |
8 | class TextArea(unittest.TestCase):
9 | def test_empty(self):
10 | doc = ET.fromstring(' ')
11 | textarea = webvulnscan.textarea.TextArea(doc)
12 | self.assertEqual(textarea.get_name, "area")
13 | self.assertEqual(textarea.get_type, "textarea")
14 |
15 | def test_placeholder(self):
16 | doc = ET.fromstring(''
17 | ' ')
18 | textarea = webvulnscan.textarea.TextArea(doc)
19 | self.assertEqual(textarea.get_name, "area")
20 | self.assertEqual(textarea.guess_value(), "somedata")
21 | self.assertEqual(textarea.get_type, "textarea")
22 |
--------------------------------------------------------------------------------
/test/test_utils.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import tutil
4 | import xml.etree.ElementTree as ET
5 | from webvulnscan import utils
6 |
7 |
8 | class SimulatedPage(object):
9 | def __init__(self, document):
10 | self.document = document
11 |
12 |
13 | class UtilsTest(unittest.TestCase):
14 | def test_change_parameter_with_query(self):
15 | link = 'http://x.yz/?other=11&val=22&yet=3'
16 | generated = utils.change_parameter(link, "val", "42")
17 | self.assertTrue('val=22' not in generated)
18 | self.assertTrue('val=42' in generated)
19 | self.assertTrue('other=11' in generated)
20 | self.assertTrue('yet=3' in generated)
21 |
22 | def test_change_parameter_no_query(self):
23 | link = 'http://x.yz/'
24 | generated = utils.change_parameter(link, "val", "42")
25 | self.assertEqual(generated, link)
26 |
27 | def test_get_url_host(self):
28 | link = 'http://random.host/test/value'
29 | self.assertEqual(utils.get_url_host(link), "random.host")
30 |
31 | def test_get_page_text_no_text(self):
32 | doc = ET.fromstring(' ')
33 | page = SimulatedPage(doc)
34 | self.assertEqual(list(utils.get_page_text(page)), [])
35 |
36 | def test_get_page_text_with_text(self):
37 | doc = ET.fromstring('textsubtext ')
38 | page = SimulatedPage(doc)
39 | self.assertEqual(list(utils.get_page_text(page)),
40 | ['text', 'subtext'])
41 |
42 | def test_modify_parameters(self):
43 | parameters = {'test': 'abc', 'test2': 'cba'}
44 | new_parameters = utils.modify_parameter(parameters,
45 | 'test', 'cba')
46 | parameter_list = list(new_parameters.values())
47 | self.assertEqual(parameter_list, ['cba', 'cba'])
48 |
--------------------------------------------------------------------------------
/test/test_xss.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import cgi
3 | import sys
4 |
5 | import tutil
6 | import webvulnscan.attacks.xss
7 |
8 | try:
9 | from urllib.parse import unquote
10 | except ImportError:
11 | from urllib2 import unquote
12 |
13 |
14 | def form_client(method, echo_param):
15 | form = u'''
16 |
17 | ''' % method
18 |
19 | def xss_site(req):
20 | return u'' + echo_param(req) + u''
21 |
22 | return {
23 | '/': form,
24 | '/send': xss_site,
25 | }
26 |
27 |
28 | class XssTest(unittest.TestCase):
29 | attack = webvulnscan.attacks.xss
30 | argument = '?test=foo'
31 |
32 | @tutil.webtest(False)
33 | def test_xss_static_site():
34 | return {
35 | '/': u'''''',
36 | }
37 |
38 | @tutil.webtest(True)
39 | def test_xss_post_vulnerable_site():
40 | return form_client('post',
41 | lambda req: req.parameters['text'])
42 |
43 | @tutil.webtest(False)
44 | def test_xss_post_secure_site():
45 | return form_client('post',
46 | lambda req: cgi.escape(req.parameters['text']))
47 |
48 | @tutil.webtest(True)
49 | def test_xss_url_vulnerable_site():
50 | return {
51 | '/': lambda req: u'' + unquote(req.url) + '',
52 | }
53 |
54 | @tutil.webtest(False)
55 | def test_xss_url_secure_site():
56 | return {
57 | '/': lambda req: (u'' +
58 | cgi.escape(unquote(req.url)) + ''),
59 | }
60 |
--------------------------------------------------------------------------------
/test/tutil.py:
--------------------------------------------------------------------------------
1 | """ Common test setup functions """
2 |
3 | import collections
4 | import logging
5 | import os.path
6 | import string
7 | import sys
8 | import random
9 |
10 |
11 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
12 | sys.path.append(root_dir)
13 |
14 | # If this fails, we failed to set up the correct path above
15 | import webvulnscan
16 |
17 |
18 | def random_token(length=8):
19 | return ''.join(random.choice(string.hexdigits) for _ in range(length))
20 |
21 |
22 | class TestLog(webvulnscan.log.Log):
23 | def assert_found(self, sub):
24 | assert any(sub in e.message for e in self.entries), (
25 | u'Expected to see "%s", but only got %r' % (
26 | (sub, [e.message for e in self.entries])))
27 |
28 | def assert_count(self, expected):
29 | assert len(self.entries) == expected, (
30 | u'Expected to see %d log entries, but got %d in log %r' %
31 | (expected, len(self.entries), list(self.entries)))
32 |
33 | def assert_vulnerable(self, vulnerable):
34 | was_vulnerable = len(self.entries) != 0
35 | assert was_vulnerable == vulnerable
36 |
37 |
38 | # A class for writing site which are detemined
39 | # to be request by webvulnscan.Client()
40 | class ClientSite(object):
41 | def __init__(self):
42 | pass
43 |
44 | def download(self, url, parameters=None, remember_visited=None):
45 | pass
46 |
47 | def download_page(self, url, parameters=None, remember_visited=None):
48 | pass
49 |
50 |
51 | class TestClient(webvulnscan.client.Client):
52 | """ url_map is a dict whose keys are either URLs or query strings,
53 | , and whose values are one of:
54 | * tuples of (status_code, response_data, headers)
55 | * just a unicode string
56 | * a callable returning a tuple or unicode string
57 |
58 | For example, a valid url_map looks like:
59 | {
60 | u'http://localhost/': (200, b' ', {}),
61 | u'/404': (404, b'Not found', {'Content-Type': 'text/html;'}),
62 | u'/req': lambda request: u' ',
63 | }
64 | """
65 |
66 | EXAMPLE_PREFIX = u'http://test.webvulnscan'
67 |
68 | def __init__(self, url_map, *args, **kwargs):
69 | super(TestClient, self).__init__(*args, log=TestLog(), **kwargs)
70 | self.url_map = dict(
71 | (self.full_url(url), content)
72 | for url, content in url_map.items()
73 | )
74 |
75 | @property
76 | def ROOT_URL(self):
77 | return self.EXAMPLE_PREFIX + u'/'
78 |
79 | def full_url(self, url):
80 | return url if u'://' in url else self.EXAMPLE_PREFIX + url
81 |
82 | def _download(self, req):
83 | req_url = req.url.partition(u'?')[0]
84 | assert req_url in self.url_map, u'Invalid request to %r' % req_url
85 | res = self.url_map[req_url]
86 | if callable(res):
87 | headers = {}
88 | res = res(req)
89 | if isinstance(res, type(u'')):
90 | status_code = 200
91 | response_data = res.encode('utf-8')
92 | headers = {'Content-Type': 'text/html; charset=utf-8'}
93 | else:
94 | status_code, response_data, headers = res
95 |
96 | assert isinstance(response_data, bytes), (
97 | u'Got invalid test response body %r' % (response_data,))
98 | return (req, status_code, response_data, headers)
99 |
100 | def run_attack(self, attack, add_url=u''):
101 | root_page = self.download_page(self.ROOT_URL + add_url)
102 | return attack(self, self.log, root_page)
103 |
104 |
105 | def webtest(vulnerable):
106 | def wrapper(func):
107 | client = TestClient(func())
108 | argument = ""
109 |
110 | def res_func(self):
111 | if hasattr(self, "argument"):
112 | client.run_attack(self.attack, self.argument)
113 | argument = self.argument
114 | else:
115 | client.run_attack(self.attack)
116 | client.log.assert_vulnerable(vulnerable)
117 |
118 | res_func.__name__ = func.__name__
119 | res_func.client = client
120 | res_func.argument = argument
121 |
122 | return res_func
123 |
124 | return wrapper
125 |
126 |
127 | class ContainsEverything(object):
128 | def __contains__(self, x):
129 | return True
130 |
131 |
132 | def TokenController(value, method='post', field_name='token'):
133 | assert method in ('get', 'post')
134 |
135 | def on_request(request):
136 | parameters = request.parameters
137 | headers = request.headers
138 | url = request.url
139 | sent_value = parameters.get(field_name, u'')
140 | out_headers = {'Content-Type': 'text/html; charset=utf-8'}
141 | if value == sent_value:
142 | content = b'Done.'
143 | return (200, content, out_headers)
144 | else:
145 | content = b'Wrong token.'
146 | return (400, content, out_headers)
147 | return on_request
148 |
149 |
150 | __all__ = ('TestLog', 'TestClient', 'TokenController', 'ContainsEverything')
151 |
--------------------------------------------------------------------------------
/test/web_runner.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from __future__ import unicode_literals
3 |
4 | import cgi
5 | import io
6 | import os
7 | import socket
8 | import unittest
9 | import sys
10 |
11 | try:
12 | from http.server import BaseHTTPRequestHandler, HTTPServer
13 | except ImportError:
14 | from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
15 |
16 | try:
17 | from urllib.parse import urlparse, parse_qs
18 | except ImportError:
19 | from urlparse import urlparse, parse_qs
20 |
21 | _WVS_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
22 |
23 | sys.path.append(_WVS_ROOT_DIR)
24 | import webvulnscan
25 |
26 | sitemap = {}
27 |
28 |
29 | class WebRunnerHandler(BaseHTTPRequestHandler):
30 | def _write(self, s):
31 | return self.wfile.write(s.encode('utf-8'))
32 |
33 | def _default_page(self):
34 | self.send_response(200)
35 | self.send_header("Content-Type", "text/html")
36 | self.end_headers()
37 | w = self._write
38 | w("""
39 |
40 |
41 |
42 | webvulnscan tests
43 |
44 |
45 | webvulnscan tests
46 |
47 |
57 |
58 | """)
59 |
60 | def _serve_request(self):
61 | parsed_path = urlparse(self.path)
62 | current_path = parsed_path.path.split('/')[1]
63 |
64 | if parsed_path.path == "/":
65 | self._default_page()
66 | elif current_path in sitemap:
67 | extended_path = "".join(parsed_path.path.split('/')[2:])
68 |
69 | site = sitemap[current_path]
70 | client = site.client
71 |
72 | if parsed_path.query == "":
73 | url = "http://test.webvulnscan/" + extended_path
74 | else:
75 | url = "http://test.webvulnscan/" + extended_path +\
76 | "?" + parsed_path.query
77 |
78 | request = webvulnscan.request.Request(url)
79 |
80 | if 'content-length' in self.headers:
81 | content_len = int(self.headers['content-length'])
82 | body = self.rfile.read(content_len)
83 | request.parameters = parse_qs(body)
84 |
85 | for value in request.parameters:
86 | new_value = request.parameters[value][0].decode('utf-8')
87 | request.parameters[value] = new_value
88 |
89 | _, status_code, response_data, headers = client._download(request)
90 | self.send_response(status_code)
91 | self.send_header('Content-Type', 'text/html')
92 | for header in headers:
93 | self.send_header(header[0], header[1])
94 | self.end_headers()
95 |
96 | self.wfile.write(response_data)
97 | else:
98 | self.send_error(404, "File not Found!")
99 |
100 | def __getattr__(self, name):
101 | if name.startswith('do_'):
102 | return self._serve_request
103 | raise AttributeError()
104 |
105 |
106 | def discover():
107 | testloader = unittest.TestLoader()
108 | suites = testloader.discover(os.path.join(_WVS_ROOT_DIR, 'test'))
109 | for suite in suites:
110 | for klass in suite:
111 | for test in klass._tests:
112 | elements = dir(test)
113 | for subklass in elements:
114 | func = getattr(test, subklass)
115 | if hasattr(func, "client"):
116 | yield func
117 |
118 |
119 | def main():
120 | for test in discover():
121 | sitemap[test.__name__] = test
122 |
123 | httpd = HTTPServer(("", 8000), WebRunnerHandler)
124 | httpd.serve_forever()
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------
/webvulnscan/__init__.py:
--------------------------------------------------------------------------------
1 | import signal
2 | import sys
3 |
4 | from .attacks import all_attacks
5 | from .client import Client
6 | from .compat import MozillaCookieJar, urlparse
7 | from .crawler import Crawler
8 | from .log import Log
9 | from .options import parse_options
10 | from .utils import get_url_host
11 | from .utils import write_config
12 |
13 |
14 | def run(options, targets):
15 | options.whitelist = set(options.whitelist)
16 | options.blacklist = set(options.blacklist)
17 |
18 | attacks = []
19 | for attack in all_attacks():
20 | if options.__dict__[attack.__name__]:
21 | attacks.append(attack)
22 |
23 | if not attacks:
24 | attacks = all_attacks()
25 |
26 | for attack in attacks:
27 | except_attack = options.__dict__[attack.__name__ + "_except"]
28 |
29 | if not except_attack:
30 | continue
31 |
32 | attacks.remove(attack)
33 |
34 | if options.verbose:
35 | log = Log(verbosity='info', direct_print=True)
36 | elif options.vuln_only:
37 | log = Log(verbosity=u'vuln')
38 | else:
39 | log = Log()
40 | client = Client(log=log)
41 |
42 | if options.import_cookies:
43 | client.cookie_jar = MozillaCookieJar(options.import_cookies)
44 | client.cookie_jar.load()
45 |
46 | # TODO This is horrible. Remove it!
47 | if options.auth_url is not None and options.auth_data is not None:
48 | post_data = {}
49 |
50 | for field in options.auth_data:
51 | name, _, value = field.partition('=')
52 | post_data.update({name: value})
53 |
54 | _, text, _ = client.download(options.auth_url, post_data)
55 | elif options.form_page and options.form_id:
56 | form_data = {}
57 |
58 | for field in options.form_data:
59 | name, _, value = field.partition('=')
60 | form_data.update({name: value})
61 |
62 | form_page = client.download_page(options.form_page)
63 | form = [x for x in form_page.get_forms()
64 | if x.document.attrib.get('id') == options.form_id][0]
65 |
66 | entries = dict(form.get_parameters())
67 |
68 | for option, value in form_data.items():
69 | entries[option] = value
70 |
71 | form.send(client, entries)
72 |
73 | try:
74 | for target in targets:
75 | if not urlparse(target).scheme:
76 | target = u'http://' + target
77 |
78 | options.whitelist.add(get_url_host(target))
79 |
80 | if options.no_crawl:
81 | all_pages = [client.download_page(target)]
82 | else:
83 | all_pages = Crawler(target, options.whitelist, client,
84 | options.blacklist)
85 |
86 | for page in all_pages:
87 | log('info', page.url, 'crawler', 'Scanning ...')
88 |
89 | for attack in attacks:
90 | attack(client, log, page)
91 |
92 | finally:
93 | if not options.verbose:
94 | log.print_report(summarize=options.do_print)
95 |
96 |
97 | def main():
98 | # Handle SIGPIPE (sent when someone is processing our output and is done)
99 | signal.signal(signal.SIGPIPE, signal.SIG_DFL)
100 |
101 | options, arguments = parse_options()
102 |
103 | if options.write_config:
104 | write_config(options.write_config, options, arguments)
105 | sys.exit(0)
106 |
107 | try:
108 | messages = run(options, arguments)
109 | except KeyboardInterrupt:
110 | sys.exit(130)
111 | except SystemExit:
112 | sys.exit(1)
113 | except BaseException:
114 | raise
115 |
116 | if messages:
117 | sys.exit(1)
118 |
--------------------------------------------------------------------------------
/webvulnscan/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Execute with
4 | # $ python webvulnscan/__main__.py (2.6+)
5 | # $ python -m webvulnscan (2.7+)
6 |
7 | import sys
8 |
9 | if __package__ is None and not hasattr(sys, "frozen"):
10 | # direct call of __main__.py
11 | import os.path
12 | path = os.path.realpath(os.path.abspath(__file__))
13 | sys.path.append(os.path.dirname(os.path.dirname(path)))
14 |
15 | import webvulnscan
16 |
17 | if __name__ == '__main__':
18 | webvulnscan.main()
19 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/__init__.py:
--------------------------------------------------------------------------------
1 | """ This modules provides various attacks and functions to run them. """
2 | from .xss import xss
3 | from .csrf import csrf
4 | from .crlf import crlf
5 | from .breach import breach
6 | from .clickjack import clickjack
7 | from .cookiescan import cookiescan
8 | from .exotic_characters import exotic_characters
9 |
10 |
11 | def all_attacks():
12 | return [xss, csrf, crlf, breach, clickjack, cookiescan, exotic_characters]
13 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/breach.py:
--------------------------------------------------------------------------------
1 | from ..utils import attack, change_parameter, could_be_secret
2 |
3 |
4 | def check_for_compression(headers, field='Content-Encoding'):
5 | v = headers.get(field, 'identity').split(',')
6 | gzip = 'gzip' not in (e.strip().lower() for e in v)
7 | deflate = 'deflate' not in (e.strip().lower() for e in v)
8 | return gzip or deflate
9 |
10 |
11 | def find_secrets(form):
12 | return set(
13 | (form_input.get_name, form_input.get_element_value)
14 | for form_input in form.get_inputs()
15 | if (form_input.get_type == "hidden"
16 | and could_be_secret(form_input.get_element_value)))
17 |
18 |
19 | @attack()
20 | def breach(client, log, target_page):
21 | if not check_for_compression(target_page.request.headers,
22 | 'Accept-Encoding'):
23 | # Redownload with request for gzip
24 | new_request = target_page.request.copy()
25 | new_request.headers['Accept-Encoding'] = "deflate, gzip"
26 | target_page = client.download_page(request)
27 | if not check_for_compression(target_page.headers):
28 | return
29 |
30 | secrets = dict((form.action, find_secrets(form))
31 | for form in target_page.get_forms())
32 |
33 | page_redownload = client.download_page(target_page.request)
34 | for form in page_redownload.get_forms():
35 | redownload_secrets = find_secrets(form)
36 | previous_secrets = secrets[form.action]
37 | constant_secrets = previous_secrets.intersection(redownload_secrets)
38 | if constant_secrets:
39 | log('vuln', target_page.url, u'BREACH vulnerability',
40 | u'Secrets %r do not change during redownload'
41 | % dict(constant_secrets),
42 | request=target_page.request)
43 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/clickjack.py:
--------------------------------------------------------------------------------
1 | from ..compat import urlparse
2 | from ..utils import attack
3 |
4 |
5 | def check_for_post_forms(page):
6 | return any(form
7 | for form in page.get_forms()
8 | if form.method == 'post')
9 |
10 |
11 | def is_valid_header(frame_options):
12 | if frame_options == "DENY":
13 | return True
14 |
15 | if frame_options == "SAMEORIGIN":
16 | return True
17 |
18 | first_word, _, url = frame_options.partition(" ")
19 | if first_word == "ALLOW-FROM":
20 | netloc = urlparse(url).netloc
21 | if netloc:
22 | return True
23 |
24 | return False
25 |
26 |
27 | @attack()
28 | def clickjack(client, log, page):
29 | if 'Content-Type' in page.headers:
30 | content_type = page.headers['Content-Type']
31 | else:
32 | content_Type = ""
33 |
34 | if not check_for_post_forms(page):
35 | return # No active content, so it's fine
36 |
37 | frame_options = page.headers.get('X-Frame-Options')
38 | if not frame_options:
39 | log('vuln', page.url, u'Clickjacking', u'no X-Frame-Options header')
40 | return
41 |
42 | if not is_valid_header(frame_options):
43 | log('vuln', page.url, u'Clickjacking', u'invalid X-Frame-Options!')
44 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/cookiescan.py:
--------------------------------------------------------------------------------
1 | from ..utils import attack
2 |
3 |
4 | def check_for_cookies(headers):
5 | return "Set-Cookie" in headers or "Set-Cookies" in headers
6 |
7 |
8 | def secure_cache_control(page):
9 | if "Cache-Control" in page.headers:
10 | cache_control = page.headers["Cache-Control"]
11 |
12 | if cache_control in "no-cache" or cache_control in "private":
13 | return True
14 |
15 | if "max-age" in cache_control and "0" in cache_control:
16 | return True
17 |
18 | return False
19 |
20 |
21 | @attack()
22 | def cookiescan(client, log, page):
23 | if not check_for_cookies(page.headers):
24 | return
25 |
26 | if not secure_cache_control(page):
27 | log('vuln', page.url, u"Implicit Cacheable Cookies")
28 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/crlf.py:
--------------------------------------------------------------------------------
1 | from ..utils import attack, change_parameter, modify_parameter
2 |
3 | BODY = u'o'
4 | CLRF_SEQUENCE = (
5 | u"Content-Type: text/html\r\n" +
6 | u"Content-Length: %d\r\n\r\n" % len(BODY))
7 | ATTACK_SEQUENCE = CLRF_SEQUENCE + BODY
8 |
9 |
10 | def attack_form(client, log, form):
11 | parameters = dict(form.get_parameters())
12 | for parameter in parameters:
13 | attack_parameters = modify_parameter(parameters, parameter,
14 | ATTACK_SEQUENCE)
15 | result = form.send(client, attack_parameters)
16 | evaluate(log, form.action, result)
17 |
18 |
19 | def attack_url(client, log, url, parameter):
20 | attack_parameters = change_parameter(url, parameter, ATTACK_SEQUENCE)
21 | result = client.download_page(attack_parameters)
22 | evaluate(log, url, result)
23 |
24 |
25 | def evaluate(log, target, result):
26 | if result.headers.get('Content-Length') == str(len(BODY)):
27 | log('vuln', target, u'CRLF Injection', request=result.request)
28 | elif result.status_code == 500:
29 | log('warn', target, u'Parameter Parsing Error', request=result.request)
30 |
31 |
32 | def search(page):
33 | for form in page.get_forms():
34 | yield ('form', form)
35 |
36 | for parameter, _ in page.url_parameters:
37 | yield ('url', page.url, parameter)
38 |
39 |
40 | @attack(search)
41 | def crlf(client, log, target_type, *args):
42 | globals()['attack_' + target_type](client, log, *args)
43 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/csrf.py:
--------------------------------------------------------------------------------
1 | from ..utils import attack
2 |
3 |
4 | def fill_entries(form, filter_type=None):
5 | for form_input in form.get_inputs():
6 | input_name = form_input.get_name
7 | input_value = form_input.guess_value()
8 | input_type = form_input.get_type
9 |
10 | if filter_type is None:
11 | yield input_name, input_value
12 | else:
13 | if input_type != filter_type:
14 | yield input_name, input_value
15 |
16 |
17 | def search(page):
18 | for form in page.get_forms():
19 | yield (form,)
20 |
21 |
22 | @attack(search)
23 | def csrf(client, log, form):
24 | # First, we send a valid request.
25 | valid_parameters = dict(fill_entries(form))
26 | form.send(client, valid_parameters)
27 |
28 | # Now, we suppress everything that looks like a token.
29 | broken_parameters = dict(fill_entries(form, "hidden"))
30 | response = form.send(client, broken_parameters)
31 |
32 | # Check if Request passed
33 | if response.status_code == 200 and not form.is_search_form:
34 | # Request passed, CSRF found...
35 | log('vuln', form.action, 'CSRF Vulnerability', message=u'',
36 | request=response)
37 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/exotic_characters.py:
--------------------------------------------------------------------------------
1 | from ..utils import attack, change_parameter, modify_parameter, get_page_text
2 |
3 | SYMBOLS = {
4 | u'"', u"'", u'<', u'{', u'(', u')', u'}', u'>', u'&', u'|', u';',
5 | u'\u1D550', u'\x00', u'\uFFFF'}
6 | DETECT_STRING = "test"
7 |
8 |
9 | def is_error_code(page):
10 | return page.status_code in [500, 503]
11 |
12 |
13 | def attack_form(client, log, form, name, symbol):
14 | guessed_parameters = dict(form.get_parameters())
15 | parameters = modify_parameter(guessed_parameters, name,
16 | symbol)
17 | try:
18 | page = form.send(client, parameters)
19 | except Exception as e:
20 | log('vuln', form.action, 'Possible incorrect Unicode Handling',
21 | repr(symbol))
22 | return
23 |
24 | if is_error_code(page):
25 | log('vuln', form.action, 'Incorrect Unicode Handling', repr(symbol))
26 |
27 |
28 | def attack_url(client, log, url, parameter):
29 | test_page = client.download_page(
30 | change_parameter(url, parameter, DETECT_STRING))
31 | if is_error_code(test_page):
32 | return
33 |
34 | for symbol in SYMBOLS:
35 | new_url = change_parameter(url, parameter, symbol.encode('utf-8'))
36 | attacked_page = client.download_page(new_url)
37 |
38 | if is_error_code(attacked_page):
39 | log('vuln', url, 'Incorrect Unicode handling in URL', repr(symbol))
40 |
41 |
42 | def search(page):
43 | for form in page.get_forms():
44 | for name, _ in form.get_parameters():
45 | for symbol in SYMBOLS:
46 | yield ('form', form, name, symbol)
47 |
48 | for parameter, _ in page.url_parameters:
49 | yield('url', page.url, parameter)
50 |
51 |
52 | @attack(search)
53 | def exotic_characters(client, log, target_type, *args):
54 | globals()['attack_' + target_type](client, log, *args)
55 |
--------------------------------------------------------------------------------
/webvulnscan/attacks/xss.py:
--------------------------------------------------------------------------------
1 | from ..utils import attack, change_parameter
2 |
3 | XSS_STRING = u''
4 |
5 |
6 | def attack_post(client, log, form):
7 | # A helper function for modifing values of the parameter list.
8 | def modify_parameter(target_name, value):
9 | parameters = dict(form.get_parameters())
10 | parameters[target_name] = value
11 | return parameters
12 |
13 | for parameter_name, parameter_value in form.get_parameters():
14 | # Replace value with XSS_STRING
15 | parameters = modify_parameter(parameter_name, XSS_STRING)
16 |
17 | # Send the form
18 | try:
19 | attacked_page = form.send(client, parameters)
20 | except Exception as e:
21 | log('warn', form.action,
22 | 'HTTP Errors occurs when confronted with html input',
23 | "in parameter" + parameter_name)
24 | return
25 |
26 | # Determine if the string is unfiltered on the page.
27 | if XSS_STRING in attacked_page.html:
28 | # Oh no! It is!
29 | log('vuln', attacked_page.url, "XSS",
30 | "in parameter " + parameter_name,
31 | request=attacked_page.request)
32 |
33 |
34 | def attack_get(client, log, url, parameter):
35 | # Replace the value of the parameter with XSS_STRING
36 | attack_url = change_parameter(url, parameter, XSS_STRING)
37 | # To run the attack, we just request the site.
38 | attacked_page = client.download_page(attack_url)
39 | # If XSS_STRING is found unfilitered in the site, we have a problem.
40 | if XSS_STRING in attacked_page.html:
41 | log('vuln', attacked_page.url, "XSS", "in URL parameter " + parameter)
42 |
43 |
44 | def search(page):
45 | for form in page.get_forms():
46 | yield ('post', form)
47 |
48 | for parameter, _ in page.url_parameters:
49 | yield ('get', page.url, parameter)
50 |
51 |
52 | @attack(search)
53 | def xss(client, log, target_type, *args):
54 | globals()['attack_' + target_type](client, log, *args)
55 |
--------------------------------------------------------------------------------
/webvulnscan/client.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import functools
3 |
4 | from .compat import build_opener, HTTPCookieProcessor, URLError, \
5 | urlencode, CookieJar, HTTPError, BadStatusLine
6 | from .utils import parse_content_type, NOT_A_PAGE_CONTENT_TYPES
7 |
8 | import gzip
9 | import zlib
10 | import webvulnscan.log
11 | from .page import Page
12 | from .request import Request
13 |
14 |
15 | class NotAPage(Exception):
16 | """ The content at the URL in question is not a webpage, but something
17 | static (image, text, etc.) """
18 |
19 |
20 | class Client(object):
21 | """ Client provides a easy interface for accessing web content. """
22 |
23 | def __init__(self, log=webvulnscan.log):
24 | self.cookie_jar = CookieJar()
25 | self.opener = self.setup_opener()
26 | self.additional_headers = {}
27 | self.log = log
28 |
29 | def setup_opener(self):
30 | """ Builds the opener for the class. """
31 | cookie_handler = HTTPCookieProcessor(self.cookie_jar)
32 | opener = build_opener(cookie_handler)
33 |
34 | return opener
35 |
36 | def _download(self, request):
37 | self.log('info', request.url, "request", "Trying to request")
38 | try:
39 | response = self.opener.open(request)
40 | except HTTPError as error:
41 | response = error
42 | except URLError as error:
43 | if hasattr(self.log, 'warn'):
44 | self.log.warn(url, "unreachable")
45 | raise URLError(request.url + ' is unreachable: {0}'.format(error))
46 | except BadStatusLine as e:
47 | self.log('warn', request.url, 'Bad status line sent')
48 | return (request, 0, "", {})
49 |
50 | status_code = response.code
51 | headers = response.info()
52 |
53 | if headers.get('Content-Encoding') == "gzip":
54 | sim_file = gzip.GzipFile(fileobj=response)
55 | response_data = sim_file.read()
56 | elif headers.get('Content-Encoding') == "deflate":
57 | response_data = zlib.decompress(response.read())
58 | else:
59 | response_data = response.read()
60 |
61 | return (request, status_code, response_data, headers)
62 |
63 | def download(self, url_or_request, parameters=None, headers=None):
64 | """
65 | Downloads a URL, returns (request, status_code, response_data, headers)
66 | """
67 |
68 | if isinstance(url_or_request, Request):
69 | assert parameters is None
70 | assert headers is None
71 | request = url_or_request.copy()
72 | else:
73 | request = Request(url_or_request, parameters, headers)
74 |
75 | for header, value in self.additional_headers.items():
76 | request.add_header(header, value)
77 |
78 | msg = ('Requesting with parameters %s' % (request.parameters,)
79 | if request.parameters else
80 | 'Requesting')
81 | self.log('info', request.url, 'client status', msg)
82 |
83 | return self._download(request)
84 |
85 | def download_page(self, url_or_request, parameters=None, req_headers=None):
86 | """ Downloads the content of a site, returns it as page.
87 | Throws NotAPage if the content is not a webpage.
88 | """
89 |
90 | request, status_code, html_bytes, headers = self.download(
91 | url_or_request, parameters, req_headers)
92 |
93 | content_type, charset = parse_content_type(
94 | headers.get('Content-Type'),
95 | logfunc=functools.partial(self.log, 'warn', request.url))
96 |
97 | if content_type in NOT_A_PAGE_CONTENT_TYPES:
98 | raise NotAPage()
99 |
100 | try:
101 | html = html_bytes.decode(charset, 'strict')
102 | except UnicodeDecodeError as ude:
103 | self.log('warn', request.url, 'Incorrect encoding', str(ude))
104 | html = html_bytes.decode(charset, 'replace')
105 |
106 | return Page(self.log, request, html, headers, status_code)
107 |
--------------------------------------------------------------------------------
/webvulnscan/compat.py:
--------------------------------------------------------------------------------
1 | try:
2 | from urllib.request import build_opener, Request, HTTPCookieProcessor, \
3 | URLError, HTTPError
4 | except:
5 | from urllib2 import build_opener, Request, HTTPCookieProcessor, \
6 | URLError, HTTPError
7 |
8 | try:
9 | from urllib.parse import urlencode, urljoin, parse_qsl, urlparse, \
10 | urlencode, quote_plus, parse_qs
11 | except ImportError:
12 | from urlparse import urljoin, parse_qsl, parse_qs, urlparse
13 | from urllib import urlencode, quote_plus
14 |
15 | try:
16 | from http.cookiejar import CookieJar, MozillaCookieJar
17 | except ImportError:
18 | from cookielib import CookieJar, MozillaCookieJar
19 |
20 | try:
21 | from html.parser import HTMLParser
22 | except ImportError: # Python < 3
23 | from HTMLParser import HTMLParser
24 |
25 | try:
26 | from http.client import BadStatusLine
27 | except ImportError:
28 | from httplib import BadStatusLine
29 |
--------------------------------------------------------------------------------
/webvulnscan/crawler.py:
--------------------------------------------------------------------------------
1 | from .client import Client, NotAPage
2 | from .utils import get_url_host
3 |
4 | from collections import deque
5 | from re import search
6 |
7 |
8 | class Crawler(object):
9 | """ Generator which systematically searches through a site. """
10 | def __init__(self, entry_point, whitelist, client=None, blacklist=set()):
11 | """
12 | Parameters:
13 | entry_point - where to start the search.
14 | whitelist - which host are allowed to be crawled.
15 | client - A client object which can be used.
16 | """
17 | self.whitelist = whitelist
18 | self.blacklist = blacklist
19 | self.entry_point = entry_point
20 |
21 | self.visited_pages = set()
22 | self.to_visit = deque()
23 |
24 | if client is None:
25 | self.client = Client()
26 | else:
27 | self.client = client
28 |
29 | def __iter__(self):
30 | self.to_visit.append(self.entry_point)
31 |
32 | while self.to_visit:
33 | url = self.to_visit.pop()
34 |
35 | if not get_url_host(url) in self.whitelist:
36 | continue
37 |
38 | if any(search(x, url) for x in self.blacklist):
39 | continue
40 |
41 | url_without_hashbang, _, _ = url.partition("#")
42 | if url_without_hashbang in self.visited_pages:
43 | continue
44 |
45 | self.visited_pages.add(url_without_hashbang)
46 | try:
47 | page = self.client.download_page(url)
48 | except NotAPage:
49 | continue
50 |
51 | yield page
52 | self.to_visit.extend(page.get_links())
53 |
--------------------------------------------------------------------------------
/webvulnscan/form.py:
--------------------------------------------------------------------------------
1 | from .compat import urlencode, urljoin
2 | from .utils import add_get_params
3 |
4 | from .form_input import FormInput
5 | from .textarea import TextArea
6 |
7 |
8 | class Form(object):
9 | def __init__(self, url, document):
10 | self.document = document
11 | self.action = urljoin(url, document.attrib.get('action'))
12 | self.parameters = {}
13 |
14 | @property
15 | def method(self):
16 | return self.document.attrib.get('method', 'get').lower()
17 |
18 | @property
19 | def is_search_form(self):
20 | role = self.document.attrib.get('role', '').lower()
21 | form_class = self.document.attrib.get('class', '').lower()
22 | return role == "search" or form_class == "search"
23 |
24 | def get_inputs(self):
25 | for input_element in self.get_input_elements():
26 | yield FormInput(input_element)
27 |
28 | for textarea in self.get_textarea_elements():
29 | yield TextArea(textarea)
30 |
31 | def get_parameters(self):
32 | for item in self.get_inputs():
33 | yield (item.get_name, item.guess_value())
34 |
35 | def get_input_elements(self):
36 | for form_input in self.document.findall('.//input'):
37 | yield form_input
38 |
39 | def get_textarea_elements(self):
40 | for textarea in self.document.findall('.//textarea'):
41 | yield textarea
42 |
43 | def send(self, client, parameters):
44 | if self.method == "get":
45 | url = add_get_params(self.action, parameters)
46 | return client.download_page(url)
47 | else:
48 | return client.download_page(self.action, parameters)
49 |
--------------------------------------------------------------------------------
/webvulnscan/form_input.py:
--------------------------------------------------------------------------------
1 | class FormInput(object):
2 | def __init__(self, element):
3 | self.element = element
4 | self.type_dictionary = {"text": "abcdefgh",
5 | "email": "ex@amp.le",
6 | "password": "abcd1234",
7 | "checkbox": "true",
8 | "radio": "1",
9 | "datetime": "1990-12-31T23:59:60Z",
10 | "datetime-local":
11 | "1985-04-12T23:20:50.52",
12 | "date": "1996-12-19",
13 | "month": "1996-12",
14 | "time": "13:37:00",
15 | "week": "1996-W16",
16 | "number": "123456",
17 | "range": "1.23",
18 | "url": "http://localhost/",
19 | "search": "query",
20 | "tel": "012345678",
21 | "color": "#FFFFFF",
22 | "hidden": "Secret.",
23 | "submit": ""}
24 |
25 | def _get_attrib_value(self, name):
26 | return self.element.attrib.get(name, "")
27 |
28 | @property
29 | def get_type(self):
30 | return self._get_attrib_value('type').lower()
31 |
32 | @property
33 | def get_name(self):
34 | return self._get_attrib_value('name')
35 |
36 | @property
37 | def get_element_value(self):
38 | return self._get_attrib_value('value')
39 |
40 | @property
41 | def minlength(self):
42 | try:
43 | return int(self._get_attrib_value('minlength'))
44 | except ValueError:
45 | return 0
46 |
47 | @property
48 | def maxlength(self):
49 | try:
50 | return int(self._get_attrib_value('maxlength'))
51 | except ValueError:
52 | return 0
53 |
54 | def guess_value(self):
55 | value = self.type_dictionary.get(self.get_type, '')
56 | supposed_value = self._get_attrib_value("value")
57 |
58 | if supposed_value:
59 | next_value = supposed_value
60 | else:
61 | next_value = value
62 |
63 | if self.get_type == "text":
64 | if self.maxlength < len(next_value) and not self.maxlength == 0:
65 | next_value = value[:self.maxlength]
66 |
67 | if self.minlength > len(next_value) and not self.minlength == 0:
68 | if len(next_value) != 0:
69 | required = len(next_value) - self.minlength \
70 | / len(next_value)
71 | next_value = value.join(value[0] * int(required))
72 |
73 | return next_value
74 |
--------------------------------------------------------------------------------
/webvulnscan/html_parser.py:
--------------------------------------------------------------------------------
1 | from .compat import HTMLParser
2 |
3 | import collections
4 | import xml.etree.ElementTree
5 |
6 |
7 | def parse_html(html, url, log):
8 | parser = EtreeParser(url, log)
9 | return xml.etree.ElementTree.fromstring(html, parser)
10 |
11 |
12 | class EtreeParser(HTMLParser):
13 | def __init__(self, url, log):
14 | # We need this ancient super form because HTMLParser is a
15 | # classic class in 2.x
16 | HTMLParser.__init__(self)
17 | self.tb = xml.etree.ElementTree.TreeBuilder()
18 | self.tag_stack = collections.deque()
19 | self.url = url
20 | self._log = log
21 |
22 | def handle_starttag(self, tag, attrs):
23 | self.tag_stack.append(tag)
24 | self.tb.start(tag, dict(attrs))
25 |
26 | def handle_endtag(self, tag):
27 | try:
28 | expected = self.tag_stack.pop()
29 | except IndexError:
30 | self._log('warn', self.url, u'HTML Error',
31 | u'Tried to close tag <%s> after root element' % (tag,))
32 | return
33 |
34 | if expected != tag:
35 | if tag in self.tag_stack:
36 | # Someone forgot to close a tag
37 | while expected != tag:
38 | if expected not in ['meta', 'input', 'br', 'hr', 'img']:
39 | self._log('warn', self.url, u'HTML Error',
40 | u'Unclosed tag <%s>' % expected)
41 | self.tb.end(expected)
42 | expected = self.tag_stack.pop()
43 | else:
44 | # Random closing tag
45 | self._log('warn', self.url, u'HTML Error',
46 | u'Encountered %s>, expected %s>'
47 | % (tag, expected))
48 | # Re-add the expected element in order to suppress
49 | # further errors
50 | self.tag_stack.append(expected)
51 | return
52 |
53 | self.tb.end(tag)
54 |
55 | def handle_data(self, data):
56 | if not data.isspace() and not self.tag_stack:
57 | self._log('warn', self.url, u'HTML Error',
58 | u'Text "%r" outside of root element' % data)
59 | self.tb.data(data)
60 |
61 | def close(self):
62 | # Close all outstanding tags
63 | for tag in self.tag_stack:
64 | self._log('warn', self.url, u'HTML Error', u'Unclosed <%s>' % tag)
65 | self.tb.end(tag)
66 | self.tag_stack.clear()
67 |
68 | HTMLParser.close(self)
69 | try:
70 | res = self.tb.close()
71 | assert res is not None, u'Document should not be empty'
72 | return res
73 | except AssertionError as error:
74 | self._log('warn', self.url, u'HTML Error', error.args[0])
75 | # Return a minimal tree
76 | return xml.etree.ElementTree.Element('html')
77 |
--------------------------------------------------------------------------------
/webvulnscan/log.py:
--------------------------------------------------------------------------------
1 | import collections
2 |
3 | LogEntry = collections.namedtuple(
4 | 'LogEntry', ['level', 'target', 'group', 'message', 'request'])
5 |
6 | _LEVEL_I18N = {
7 | u'warn': u'Warning',
8 | u'vuln': u'Vulnerability',
9 | u'info': u'Info',
10 | }
11 | LEVELS = (u'info', u'warn', u'vuln')
12 |
13 |
14 | def entry_str(entry):
15 | if entry.request is None:
16 | return '%s: %s %s %s' % (
17 | _LEVEL_I18N[entry.level], entry.target, entry.group, entry.message)
18 | else:
19 | return '%s: %s %s %s | Request: %s' % (
20 | _LEVEL_I18N[entry.level], entry.target, entry.group, entry.message,
21 | entry.request.url)
22 |
23 |
24 | class AbortProcessing(Exception):
25 | """ Stop searching now. """
26 |
27 |
28 | class Log(object):
29 | def __init__(self, abort=False, verbosity=u'warn', direct_print=False):
30 | self.abort = abort
31 | self.entries = []
32 | self.verbosity = verbosity
33 | self.direct_print = direct_print
34 |
35 | def __call__(self, level, target, group, message=u'', request=None):
36 | assert level in LEVELS
37 | if LEVELS.index(level) < LEVELS.index(self.verbosity):
38 | return # Ignore this log entry
39 |
40 | entry = LogEntry(level, target, group, message, request)
41 | self.entries.append(entry)
42 | if self.abort:
43 | raise AbortProcessing()
44 |
45 | if self.direct_print:
46 | print(entry_str(entry))
47 |
48 | def print_report(self, summarize=True):
49 | summary = collections.defaultdict(set)
50 | for e in self.entries:
51 | summary[(e.level, e.group, e.message)].add(e)
52 |
53 | for k, sum_entries in sorted(summary.items()):
54 | level, group, message = k
55 | if summarize and len(sum_entries) > 3:
56 | print(entry_str(sorted(sum_entries)[0]) +
57 | ' (and %d similar)' % (len(sum_entries) - 1))
58 | else:
59 | for e in sorted(sum_entries):
60 | print(entry_str(e))
61 |
62 | __all__ = ['AbortProcessing', 'Log']
63 |
--------------------------------------------------------------------------------
/webvulnscan/options.py:
--------------------------------------------------------------------------------
1 | from optparse import OptionParser, OptionGroup, Values
2 |
3 | from .attacks import all_attacks
4 | from .utils import read_config
5 |
6 |
7 | def parse_options():
8 | parser = OptionParser(usage='usage: %prog [options] url...')
9 |
10 | default_options = OptionGroup(parser, "Default", "")
11 | default_options.add_option('--verbose', '-v', default=None, dest="verbose",
12 | action="store_true",
13 | help="Print the current targets, etc.")
14 | default_options.add_option('--dont-filter', default=True, dest="do_print",
15 | action="store_false",
16 | help="Write output directly to the command"
17 | "line, don't filter it.")
18 | default_options.add_option('--vulnerabilities-only', default=False,
19 | dest="vuln_only", action="store_true",
20 | help="Print only vulnerabilities "
21 | "(i.e. no warnings)")
22 | default_options.add_option('--abort-early', '-a', default=False,
23 | dest="abort_early", action="store_true",
24 | help="Exit on first found vulnerability.")
25 | default_options.add_option('--import-cookies', default=None,
26 | dest="import_cookies",
27 | help="Given a file, it will import it."
28 | "(Hint: Useful to avoid Captchas...)")
29 | parser.add_option_group(default_options)
30 |
31 | crawling_options = OptionGroup(parser, "Crawling",
32 | "This section provides information"
33 | "about the different crawling options.")
34 | crawling_options.add_option('--no-crawl', action='store_true',
35 | dest='no_crawl',
36 | help="DO NOT search for links on the target")
37 | crawling_options.add_option('--whitelist', default=[],
38 | dest="whitelist",
39 | help="Hosts which are allowed to be crawled.")
40 | crawling_options.add_option('--blacklist', default=[], dest="blacklist",
41 | action="append",
42 | help="Specify sites which shouldn't be"
43 | "visited or attacked. (Hint: logout)")
44 |
45 | parser.add_option_group(crawling_options)
46 | authentification_options = OptionGroup(parser, "Authentification",
47 | "Authentification to a specific"
48 | " post site.")
49 | authentification_options.add_option('--auth', default=None,
50 | dest="auth_url",
51 | help="Post target for "
52 | "authentification")
53 | authentification_options.add_option('--auth-data', dest='auth_data',
54 | action='append', type='str',
55 | default=[],
56 | help="A post parameter in the "
57 | "form of targetname=targetvalue")
58 | authentification_options.add_option('--form-page', dest='form_page',
59 | default=None,
60 | help="The site of the form you want "
61 | "to use to sign in")
62 | authentification_options.add_option('--form-id', dest='form_id',
63 | default=None,
64 | help="The id of the form you want "
65 | "to use to sign in.")
66 | authentification_options.add_option('--form-data', dest='form_data',
67 | action='append', type='str',
68 | default=[],
69 | help="A field you want to set "
70 | "manually.")
71 | parser.add_option_group(authentification_options)
72 |
73 | configuration_options = OptionGroup(parser, "Configuration",
74 | "You are also able to write your"
75 | " specified parameters in a file"
76 | " for easier usage.")
77 | configuration_options.add_option('--config', '-c', metavar='FILE',
78 | dest="read_config",
79 | help="Read the parameters from FILE")
80 | configuration_options.add_option('--write-config', metavar='FILE',
81 | dest="write_config",
82 | help="Insted of running the options,"
83 | ' write them to the specified file ("-" '
84 | 'for standard output).')
85 | parser.add_option_group(configuration_options)
86 |
87 | # filter_options = OptionGroup(parser, "Filter",
88 | # "Functions which may"
89 | # "enhance user experience")
90 | # filter_options.add_option("--no-heuristics",
91 | # dest="no_heuristics"
92 | # help="Do not filter results")
93 | # parser.add_option_group(filter_options)
94 |
95 | # Options for scanning for specific vulnerabilities.
96 | attack_options = OptionGroup(parser, "Attacks",
97 | "If you specify own or several of the "
98 | "options _only_ this/these will be run. "
99 | "If you don't specify any, all will be "
100 | "run.")
101 | for attack in all_attacks():
102 | attack_options.add_option('--' + attack.__name__, dest=attack.__name__,
103 | action="store_true", default=False)
104 | attack_options.add_option('--except-' + attack.__name__,
105 | dest=attack.__name__ + "_except",
106 | action="store_true", default=False)
107 | parser.add_option_group(attack_options)
108 |
109 | # Get default values
110 | options, arguments = parser.parse_args([])
111 |
112 | # Parse command line
113 | cli_options = Values()
114 | _, cli_arguments = parser.parse_args(values=cli_options)
115 |
116 | # Update default values with configuration file
117 | config_fn = cli_options.__dict__.get('read_config')
118 | if config_fn is not None:
119 | read_options, read_arguments = read_config(config_fn, parser)
120 | options.__dict__.update(read_options)
121 | arguments += read_arguments
122 |
123 | # Update actual CLI options
124 | options.__dict__.update(cli_options.__dict__)
125 | arguments += cli_arguments
126 |
127 | if not arguments and not options.write_config:
128 | parser.error(u'Need at least one target')
129 |
130 | return (options, arguments)
131 |
--------------------------------------------------------------------------------
/webvulnscan/page.py:
--------------------------------------------------------------------------------
1 | """ Page.py module implements a page """
2 | from .html_parser import parse_html
3 |
4 | from .compat import urljoin, parse_qsl
5 |
6 | from .form import Form
7 | from re import search
8 |
9 |
10 | class Page(object):
11 | def __init__(self, log, request, html, headers, status_code):
12 | assert hasattr(request, 'url')
13 | self.request = request
14 | self.html = html
15 | self.headers = headers
16 | self.status_code = status_code
17 | self.document = parse_html(html, request.url, log)
18 |
19 | @property
20 | def url(self):
21 | return self.request.url
22 |
23 | @property
24 | def url_parameters(self):
25 | _, _, url = self.url.partition("?")
26 | return parse_qsl(url)
27 |
28 | def get_forms(self, blacklist=[]):
29 | """ Generator for all forms on the page. """
30 | for form in self.document.findall('.//form'):
31 | generated = Form(self.url, form)
32 |
33 | if any(search(x, generated.action) for x in blacklist):
34 | continue
35 |
36 | yield generated
37 |
38 | def get_links(self, blacklist=[]):
39 | """ Generator for all links on the page. """
40 | for link in self.document.findall('.//a[@href]'):
41 | href = link.attrib.get('href')
42 | url = urljoin(self.url, href)
43 | if any(search(x, url) for x in blacklist):
44 | continue
45 | yield url
46 |
--------------------------------------------------------------------------------
/webvulnscan/request.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import sys
3 |
4 | from . import compat
5 | from .compat import urlencode, parse_qs
6 |
7 |
8 | class Request(compat.Request):
9 | def __init__(self, url, parameters=None, headers=None):
10 | self.parameters = parameters
11 | if parameters is None:
12 | data = None
13 | else:
14 | if sys.version_info >= (3, 0):
15 | data = urlencode(parameters).encode('utf-8')
16 | else:
17 | byte_parameters = dict(
18 | (k.encode('utf-8'), v.encode('utf-8'))
19 | for k, v in parameters.items())
20 | data = urlencode(byte_parameters)
21 | assert isinstance(data, bytes)
22 | if headers is None:
23 | headers = {}
24 | compat.Request.__init__(self, url, data, headers)
25 |
26 | def copy(self):
27 | return copy.copy(self)
28 |
29 | @property
30 | def url(self):
31 | return self.get_full_url()
32 |
--------------------------------------------------------------------------------
/webvulnscan/textarea.py:
--------------------------------------------------------------------------------
1 | DEFAULT_VALUE = "Lorem ipsum dolor sit amet, consetetur sadipscing elitr," \
2 | + "sed diam nonumy eirmod tempor invidunt ut labore et " \
3 | + "dolore magna aliquyam"
4 |
5 |
6 | class TextArea(object):
7 | def __init__(self, element):
8 | self.element = element
9 |
10 | def _get_attrib_value(self, name):
11 | value = self.element.attrib.get(name)
12 |
13 | if value:
14 | return value
15 |
16 | return ""
17 |
18 | @property
19 | def get_type(self):
20 | return "textarea"
21 |
22 | @property
23 | def get_name(self):
24 | return self._get_attrib_value('name')
25 |
26 | def guess_value(self):
27 | placeholder = self._get_attrib_value("placeholder")
28 | if placeholder == "":
29 | return DEFAULT_VALUE
30 | else:
31 | return placeholder
32 |
--------------------------------------------------------------------------------
/webvulnscan/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions described here are for python 2/3 compability and other tasks.
3 | """
4 |
5 | from .compat import (
6 | urlparse, urlencode, urljoin, parse_qsl, parse_qs)
7 |
8 | import email.parser
9 | import io
10 | import json
11 | import re
12 | import sys
13 |
14 | # Safe content types (will not be rendered as a webpage by the browser)
15 | NOT_A_PAGE_CONTENT_TYPES = frozenset([
16 | 'text/plain',
17 | 'text/x-python',
18 | 'image/gif',
19 | 'image/jpeg',
20 | 'image/png',
21 | 'image/svg+xml',
22 | ])
23 | HTML_CONTENT_TYPES = frozenset([
24 | "text/html",
25 | "application/xhtml+xml",
26 | ])
27 |
28 |
29 | def parse_content_type(val, logfunc=None):
30 | if val:
31 | content_type, _, encoding = val.partition(";")
32 |
33 | if content_type in NOT_A_PAGE_CONTENT_TYPES:
34 | return (content_type, None)
35 |
36 | if content_type not in HTML_CONTENT_TYPES:
37 | if logfunc:
38 | logfunc(u'Strange content type', content_type)
39 |
40 | attrib_name, _, charset = encoding.partition('=')
41 | if attrib_name.strip() != "charset":
42 | if logfunc:
43 | logfunc(u'No Charset set')
44 | charset = 'utf-8'
45 | else:
46 | if logfunc:
47 | logfunc(u'No Content-Type header, assuming text/html')
48 | charset = 'utf-8'
49 | content_type = 'text/html'
50 |
51 | return (content_type, charset)
52 |
53 |
54 | def read_config(config_file, parser):
55 | with io.open(config_file, 'r', encoding='utf-8') as f:
56 | values = json.load(f)
57 |
58 | return values['options'], values['arguments']
59 |
60 |
61 | def write_json(obj, filename, **kwargs):
62 | if filename == u'-':
63 | out = sys.stdout
64 | else:
65 | if sys.version_info >= (3, 0):
66 | out = open(filename, 'w+', encoding='utf-8')
67 | else:
68 | # In Python 2.x, json.dump expects a bytestream
69 | out = open(filename, 'wb')
70 |
71 | with out:
72 | json.dump(obj, out, **kwargs)
73 |
74 |
75 | def write_config(filename, options, arguments):
76 | options_dict = options.__dict__.copy()
77 | del options_dict['write_config']
78 | del options_dict['read_config']
79 | write_json({"options": options_dict, "arguments": arguments}, filename,
80 | indent=4)
81 |
82 |
83 | def modify_parameter(parameters, target_name, value):
84 | res = parameters.copy()
85 | res[target_name] = value
86 | return res
87 |
88 |
89 | def change_parameter(url, parameter, new_value):
90 | """ Returns a new url where the parameter is changed. """
91 | url_query = urlparse(url).query
92 | query = dict(parse_qsl(url_query))
93 |
94 | if query:
95 | for name, _ in query.items():
96 | if name == parameter:
97 | query[name] = new_value
98 |
99 | encoded = "?" + urlencode(query)
100 | return urljoin(url, encoded)
101 | else:
102 | return url
103 |
104 |
105 | def get_url_host(url):
106 | """ Returns the server of a name."""
107 | return urlparse(url).netloc
108 |
109 |
110 | def get_page_text(page):
111 | if page.document.text:
112 | yield page.document.text
113 |
114 | for element in page.document.findall('.//*'):
115 | if element.text:
116 | yield element.text
117 |
118 |
119 | def attack(searchfunc=None):
120 | if searchfunc is None:
121 | searchfunc = lambda page: [(page,)]
122 |
123 | def run(cls, client, log, page):
124 | for s in cls.search(page):
125 | cls.attack(client, log, *s)
126 |
127 | def decorator(attackfunc):
128 | return type(attackfunc.__name__, (object,), {
129 | 'attack': staticmethod(attackfunc),
130 | 'search': staticmethod(searchfunc),
131 | '__new__': run,
132 | })
133 | return decorator
134 |
135 |
136 | def could_be_secret(s):
137 | return len(s) >= 6 and re.match(r'^[0-9a-fA-F$!]+$', s)
138 |
139 |
140 | def get_param(url, pname):
141 | """ Return a GET parameter from a URL """
142 | return parse_qs(urlparse(url).query).get(pname, [u''])[0]
143 |
144 |
145 | def add_get_params(url, params):
146 | assert isinstance(params, dict)
147 |
148 | for key in params.keys():
149 | params[key] = params[key].encode('ascii', 'ignore')
150 |
151 | return (url +
152 | (u'&' if u'?' in url else '?') +
153 | urlencode(params))
154 |
155 |
156 | def parse_http_headers(bs):
157 | assert isinstance(bs, bytes)
158 | s = bs.decode('utf-8')
159 | p = email.parser.Parser()
160 | res = p.parse(io.StringIO(s), headersonly=True)
161 | return res
162 |
--------------------------------------------------------------------------------