├── core
    ├── __init__.py
    ├── crawl
    │   ├── __init__.py
    │   ├── lib
    │   │   ├── __init__.py
    │   │   ├── crawl_result.py
    │   │   ├── shared.py
    │   │   ├── urlfinder.py
    │   │   ├── probe.py
    │   │   └── utils.py
    │   ├── probe
    │   │   ├── .gitignore
    │   │   ├── chrome_extension
    │   │   │   ├── content.js
    │   │   │   ├── manifest.json
    │   │   │   ├── example_navigation_away_test_case.html
    │   │   │   └── background.js
    │   │   ├── package.json
    │   │   ├── logger.js
    │   │   ├── src
    │   │   │   ├── constants.js
    │   │   │   ├── utils.js
    │   │   │   └── page-handler.js
    │   │   ├── .eslintrc
    │   │   └── index.js
    │   └── crawler_thread.py
    ├── lib
    │   ├── __init__.py
    │   ├── thirdparty
    │   │   ├── __init__.py
    │   │   ├── pysocks
    │   │   │   ├── __init__.py
    │   │   │   └── sockshandler.py
    │   │   └── simhash
    │   │   │   └── __init__.py
    │   ├── exception.py
    │   ├── shell.py
    │   ├── request_pattern.py
    │   ├── utils.py
    │   ├── cookie.py
    │   ├── request.py
    │   ├── http_get.py
    │   └── database.py
    ├── scan
    │   ├── __init__.py
    │   ├── scanners
    │   │   ├── __init__.py
    │   │   ├── curl.py
    │   │   ├── ck401.py
    │   │   ├── sqlmap.py
    │   │   ├── wapiti.py
    │   │   └── arachni.py
    │   ├── scanner.py
    │   └── base_scanner.py
    ├── util
    │   ├── __init__.py
    │   ├── utilities
    │   │   ├── __init__.py
    │   │   ├── lsvuln.py
    │   │   ├── updcookie.py
    │   │   ├── lsajax.py
    │   │   ├── usgen.py
    │   │   ├── login.py
    │   │   ├── htmlreport
    │   │   │   ├── report.html
    │   │   │   └── style.css
    │   │   ├── login
    │   │   │   └── login.js
    │   │   └── report.py
    │   ├── base_util.py
    │   └── util.py
    └── constants.py
├── requirements.txt
├── .gitignore
├── requirements-dev.txt
├── .travis.yml
├── scripts
    ├── htmlreport.py
    └── quickscan.sh
├── tests
    ├── lib_tests
    │   ├── shell_tests.py
    │   ├── request_tests.py
    │   └── database_tests.py
    └── crawl_tests
    │   ├── probe_tests.py
    │   ├── crawler_tests.py
    │   └── urlfinder_tests.py
├── htcap.py
└── README.md


/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/crawl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/scan/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/crawl/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/lib/thirdparty/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/scan/scanners/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/util/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | HTMLParser==0.0.2


--------------------------------------------------------------------------------
/core/lib/thirdparty/pysocks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 | tmp.py
3 | .idea
4 | *.db
5 | tmp
6 | /dist/
7 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | nose==1.3.7
2 | coverage==4.2
3 | mock==2.0.0
4 | 


--------------------------------------------------------------------------------
/core/crawl/probe/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | /package-lock.json
3 | /yarn.lock
4 | /node_modules/
5 | /*.log
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: "python"
2 | python: "2.7"
3 | install: "pip install -r requirements.txt && pip install -r requirements-dev.txt"
4 | script: "nosetests"
5 | 


--------------------------------------------------------------------------------
/core/crawl/probe/chrome_extension/content.js:
--------------------------------------------------------------------------------
1 | (function() {
2 |     'use strict';
3 |     // transmitting url received from the background page to the page
4 |     chrome.runtime.onMessage.addListener(function(msg) {
5 |         window.postMessage({from: 'javascript-probe', name: 'navigation-blocked', url: msg.url}, '*');
6 |     });
7 | })();
8 | 


--------------------------------------------------------------------------------
/core/crawl/probe/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "javascript-probe",
 3 |   "version": "1.0.0",
 4 |   "main": "index.js",
 5 |   "license": "GPL-2.0",
 6 |   "dependencies": {
 7 |     "argparse": "1.0.9",
 8 |     "puppeteer": "1.2.0",
 9 |     "winston": "2.4.0"
10 |   },
11 |   "devDependencies": {
12 |     "eslint": "4.8.0"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/core/crawl/probe/chrome_extension/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 2,
 3 |   "name": "JavaScript Probe extension - Navigation blocker",
 4 |   "version": "1.0",
 5 |   "license": "GPL-2.0",
 6 |   "permissions": [
 7 |     "webRequest",
 8 |     "webRequestBlocking",
 9 |     "<all_urls>",
10 |     "tabs"
11 |   ],
12 |   "background": {
13 |     "persistent": true,
14 |     "scripts": [
15 |       "background.js"
16 |     ]
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/core/lib/exception.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | 
14 | class NotHtmlException(Exception):
15 |     pass
16 | 
17 | 
18 | class RedirectException(Exception):
19 |     pass
20 | 
21 | 
22 | class ThreadExitRequestException(Exception):
23 |     pass
24 | 
25 | # class MalformedUrlException(Exception):
26 | # 	pass
27 | 


--------------------------------------------------------------------------------
/core/crawl/lib/crawl_result.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | 
14 | class CrawlResult:
15 |     def __init__(self, request, found_requests=None, errors=None):
16 |         self.request = request
17 |         self.found_requests = found_requests if found_requests else []
18 |         self.errors = errors if errors else []
19 | 


--------------------------------------------------------------------------------
/core/crawl/lib/shared.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | 
14 | # TODO: make sure that only shared data are stored in this object
15 | 
16 | class Shared:
17 |     """
18 |     data shared between threads
19 |     """
20 | 
21 |     def __init__(self):
22 |         pass
23 | 
24 |     main_condition = None
25 |     th_condition = None
26 | 
27 |     requests = []
28 |     requests_index = 0
29 |     crawl_results = []
30 | 
31 |     start_url = ""
32 |     start_cookies = []
33 |     end_cookies = []
34 |     allowed_domains = set()
35 |     excluded_urls = set()
36 | 
37 |     probe_cmd = []
38 | 
39 |     options = {}
40 | 


--------------------------------------------------------------------------------
/core/crawl/probe/chrome_extension/example_navigation_away_test_case.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>test</title>
 6 | 
 7 | </head>
 8 | <body>
 9 | 
10 | <a href="pouet.html">pouet (should be blocked)</a>
11 | <br/><br/>
12 | <a href="javascript:window.location = 'pouet2.html'">pouet2 (should be blocked)</a>
13 | <br/><br/>
14 | <a href="#pouet3">pouet3 (should "navigate")</a>
15 | <br/><br/>
16 | 
17 | 
18 | <script>
19 |     window.onbeforeunload = function() {
20 |         console.warn('Page: beforeunload');
21 |     };
22 | 
23 |     window.onunload = function() {
24 |         console.err('Page : unload');
25 |     };
26 | 
27 |     window.addEventListener('message', function(event) {
28 |         if (event.data.type && event.data.type === 'NavigationBlocked' && event.data.url) {
29 |             console.info('Got url from extension: ' + event.data.url);
30 |         }
31 |     });
32 | </script>
33 | </body>
34 | </html>
35 | 


--------------------------------------------------------------------------------
/scripts/htmlreport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | 
 5 | """
 6 | HTCAP - beta 1
 7 | Author: filippo.cavallarin@wearesegment.com
 8 | 
 9 | This program is free software; you can redistribute it and/or modify it under 
10 | the terms of the GNU General Public License as published by the Free Software 
11 | Foundation; either version 2 of the License, or (at your option) any later 
12 | version.
13 | """
14 | 
15 | import sys
16 | import os
17 | import sqlite3
18 | import json
19 | from urlparse import urlsplit
20 | import glob
21 | import importlib
22 | 
23 | reload(sys)
24 | sys.setdefaultencoding('utf8')
25 | 
26 | sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__)) + os.sep + ".."))
27 | 
28 | print "* WARNING: this script is here for back compatibility reasons and will be removed soon!!\n* Use 'htcap util report' instead"
29 | 
30 | mod = importlib.import_module("core.util.utilities.report")
31 | run = getattr(mod, "Report")
32 | run(['report'] + sys.argv[1::])
33 | 


--------------------------------------------------------------------------------
/core/crawl/probe/logger.js:
--------------------------------------------------------------------------------
 1 | (function() {
 2 |     'use strict';
 3 |     const winston = require('winston');
 4 | 
 5 |     let outputLogger = new winston.Logger({
 6 |         transports: [
 7 |             new (winston.transports.Console)(
 8 |                 {
 9 |                     formatter: (options) => {
10 |                         return options.message;
11 |                     },
12 |                 },
13 |             ),
14 |         ],
15 |         exitOnError: true,
16 |     });
17 | 
18 |     let debugLogger = new winston.Logger({
19 |         transports: [
20 |             new (winston.transports.File)(
21 |                 {
22 |                     level: 'debug',
23 |                     filename: __dirname + '/debug.log',
24 |                     prettyPrint: true,
25 |                     timestamp: true,
26 |                     json: false,
27 |                 },
28 |             ),
29 |         ],
30 |         exceptionHandlers: [
31 |             new (winston.transports.Console)({json: false, timestamp: true, prettyPrint: true}),
32 |         ],
33 |     });
34 |     module.exports = {output: outputLogger, debug: debugLogger};
35 | 
36 | })();
37 | 


--------------------------------------------------------------------------------
/core/scan/scanners/curl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | from __future__ import unicode_literals
14 | 
15 | import re
16 | from core.scan.base_scanner import BaseScanner
17 | 
18 | 
19 | class Curl(BaseScanner):
20 | 	def init(self, argv):
21 | 		return True
22 | 	
23 | 	def get_settings(self):
24 | 		return dict(			
25 | 			request_types = "link,redirect", 
26 | 			num_threads = 10,
27 | 			process_timeout = 20 ,
28 | 			scanner_exe = "/usr/bin/env curl"
29 | 		)
30 | 
31 | 	def get_cmd(self, request, tmp_dir):
32 | 		cmd = [ "-I", request.url]		
33 | 		print self.scanner_name
34 | 		
35 | 		return cmd
36 | 
37 | 	def scanner_executed(self, request, out, err, tmp_dir, cmd):
38 | 		if not re.search("^X-XSS-Protection\:", out, re.M):			
39 | 			self.save_vulnerability(request, "xss-portection-missing", "X-XSS-Protection header is not set")
40 | 		


--------------------------------------------------------------------------------
/core/util/base_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | from __future__ import unicode_literals
14 | import sys
15 | import getopt 
16 | 
17 | class BaseUtil:
18 | 
19 | 	@staticmethod
20 | 	def get_settings():
21 | 		return dict(
22 | 			descr = "",
23 | 			optargs = '',
24 | 			minargs = 0
25 | 		)
26 | 
27 | 	def usage(self):
28 | 		return (
29 | 			"%s\n"
30 | 			"usage: %s\n"
31 | 			% (self.get_settings()['descr'], self.utilname)
32 | 		)
33 | 
34 | 	def __init__(self, argv):
35 | 		self.utilname = argv[0]
36 | 		settings = self.get_settings()
37 | 
38 | 		if len(argv) < (settings['minargs'] + 1):
39 | 			print self.usage()
40 | 			sys.exit(1)
41 | 
42 | 		try:
43 | 			opts, args = getopt.getopt(argv[1:], settings['optargs'])
44 | 		except getopt.GetoptError as err:
45 | 			print str(err)
46 | 			sys.exit(1)
47 | 
48 | 		self.main(args, opts)
49 | 


--------------------------------------------------------------------------------
/core/util/utilities/lsvuln.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | import sys
 3 | import sqlite3
 4 | import json
 5 | 
 6 | from core.util.base_util import BaseUtil
 7 | 
 8 | reload(sys)
 9 | sys.setdefaultencoding('utf8')
10 | 
11 | class Lsvuln(BaseUtil):
12 | 
13 | 	@staticmethod
14 | 	def get_settings():
15 | 		return dict(
16 | 			descr = "List all vulnerabilities",
17 | 			optargs = '',
18 | 			minargs = 1
19 | 		)
20 | 
21 | 	def usage(self):
22 | 		return (
23 | 			"%s\n"
24 | 			"usage: %s <dbfile> [<sql_where_clause>]\n" 
25 | 			% (self.get_settings()['descr'], self.utilname)
26 | 		)
27 | 
28 | 	def main(self, args, opts):
29 | 		qry = """
30 | 			SELECT scanner,start_date,end_date,id_request,type,description FROM assessment a 
31 | 			INNER JOIN vulnerability av ON a.id=av.id_assessment
32 | 			WHERE
33 | 			%s
34 | 		"""
35 | 
36 | 		dbfile = args[0]
37 | 		where = args[1] if len(args) > 1 else "1=1"
38 | 
39 | 		conn = sqlite3.connect(dbfile)
40 | 		conn.row_factory = sqlite3.Row 
41 | 
42 | 		cur = conn.cursor()
43 | 		cur.execute(qry % where)
44 | 		for vuln in cur.fetchall():
45 | 			print vuln['description']
46 | 			print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - "
47 | 
48 | 


--------------------------------------------------------------------------------
/tests/lib_tests/shell_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import time
 3 | import os
 4 | import subprocess
 5 | from mock import patch
 6 | from core.lib.shell import CommandExecutor
 7 | 
 8 | 
 9 | class ExecutorTest(unittest.TestCase):
10 | 
11 |     @patch('core.lib.shell.subprocess')
12 |     def test_command_responds(self, mock_process):
13 |         mock_process.Popen.side_effect = time.sleep(1)
14 |         mock_process.Popen().communicate.return_value = ('hurray', 'err')
15 | 
16 |         executor = CommandExecutor(['cmd'])
17 |         result = executor.execute(2)
18 | 
19 |         self.assertEqual(result, "hurray")
20 | 
21 | 
22 |     def test_command_timeout_with_results(self):
23 |         cmd = ['tail', '-f', os.path.realpath(__file__)]
24 |         executor = CommandExecutor(cmd, stderr=True)
25 |         result = executor.execute(1)
26 | 
27 |         self.assertIn("result", result[0])
28 | 
29 |     @patch.object(subprocess.Popen, 'communicate')
30 |     def test_command_timeout_with_errors(self, mock_comm):
31 |         mock_comm.return_value= (None, "error")
32 |         cmd = ['sleep', '10']
33 |         executor = CommandExecutor(cmd, stderr=True)
34 |         result = executor.execute(1)
35 | 
36 |         self.assertEquals("error", result[1])
37 | 


--------------------------------------------------------------------------------
/core/util/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | from __future__ import unicode_literals
14 | import sys
15 | import importlib
16 | from glob import glob
17 | from core.lib.utils import *
18 | 
19 | class Util:
20 | 
21 | 	def get_mod(self, path, name):
22 | 		mod = importlib.import_module("%s.%s" % (path, name))
23 | 		return getattr(mod, name.title())
24 | 
25 | 
26 | 	def __init__(self, argv):
27 | 		util = argv[0] if len(argv) >= 1 else ""
28 | 		mp = "core.util.utilities"
29 | 		fp = "%s%sutilities" % (getrealdir(__file__), os.sep)
30 | 		utils = [os.path.basename(m).split(".")[0] for m in glob(os.path.join(fp , '[a-z]*[a-z].py'))]
31 | 
32 | 		if not util in utils:
33 | 			utils.sort()
34 | 			print "Available utilities are:"
35 | 			for u in utils:
36 | 				run = self.get_mod(mp, u)
37 | 				print "   %s%s%s" % (u, " "*(20 - len(u)), run.get_settings()['descr'].split("\n")[0])
38 | 			sys.exit(1)
39 | 
40 | 		run = self.get_mod(mp, util)
41 | 		run([util] + argv[1:])


--------------------------------------------------------------------------------
/htcap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | """
 5 | HTCAP - beta 1
 6 | Author: filippo.cavallarin@wearesegment.com
 7 | 
 8 | This program is free software; you can redistribute it and/or modify it under 
 9 | the terms of the GNU General Public License as published by the Free Software 
10 | Foundation; either version 2 of the License, or (at your option) any later 
11 | version.
12 | """
13 | 
14 | from __future__ import unicode_literals
15 | import sys
16 | import os
17 | import datetime
18 | import time
19 | import getopt
20 | 
21 | from core.lib.utils import *
22 | from core.crawl.crawler import Crawler
23 | from core.scan.scanner import Scanner
24 | 
25 | from core.util.util import Util
26 | 
27 | reload(sys)
28 | sys.setdefaultencoding('utf8')
29 | 
30 | 
31 | def usage():
32 | 	infos = get_program_infos()
33 | 	print ("htcap ver " + infos['version'] + "\n"
34 | 		   "usage: htcap <command>\n" 
35 | 		   "Commands: \n"
36 | 		   "  crawl                  run crawler\n"
37 | 		   "  scan                   run scanner\n"
38 | 		   "  util                   run utility\n"
39 | 		   )
40 | 
41 | 
42 | if __name__ == '__main__':
43 | 
44 | 	if len(sys.argv) < 2:
45 | 		usage()
46 | 		sys.exit(1)
47 | 
48 | 	elif sys.argv[1] == "crawl":
49 | 		crawler = Crawler(sys.argv[2:])
50 | 		crawler.run()
51 | 	elif sys.argv[1] == "scan":
52 | 		Scanner(sys.argv[2:])
53 | 	elif sys.argv[1] == "util":
54 | 		Util(sys.argv[2:])
55 | 	else:
56 | 		usage();
57 | 		sys.exit(1)
58 | 
59 | 	sys.exit(0)
60 | 


--------------------------------------------------------------------------------
/core/util/utilities/updcookie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | import sys
 3 | import sqlite3
 4 | import json
 5 | import getopt 
 6 | import os
 7 | 
 8 | from core.util.base_util import BaseUtil
 9 | 
10 | reload(sys)
11 | sys.setdefaultencoding('utf8')
12 | 
13 | class Updcookie(BaseUtil):
14 | 
15 | 	@staticmethod
16 | 	def get_settings():
17 | 		return dict(
18 | 			descr = "Update the value of a cookie of saved requests",
19 | 			optargs = '',
20 | 			minargs = 3
21 | 		)
22 | 
23 | 	def usage(self):
24 | 		return (
25 | 			"%s\n"
26 | 			"usage: %s <dbfile> <cookie_name> <cookie_value> [<sql_where_clause>]\n" 
27 | 			% (self.get_settings()['descr'], self.utilname)
28 | 		)
29 | 
30 | 
31 | 	def main(self, argv):
32 | 		qry = """
33 | 			SELECT id, cookies
34 | 			FROM request 
35 | 			WHERE %s
36 | 		"""
37 | 
38 | 		dbfile = args[0]
39 | 		cname = args[1]
40 | 		cvalue = args[2]
41 | 
42 | 		if not os.path.exists(dbfile):
43 | 			print "No such file %s" % dbfile
44 | 			sys.exit(1)
45 | 
46 | 		where = args[3] if len(args) > 3 else "1=1"
47 | 
48 | 		conn = sqlite3.connect(dbfile)
49 | 		conn.row_factory = sqlite3.Row 
50 | 
51 | 		cur = conn.cursor()
52 | 		wcur = conn.cursor()
53 | 		cur.execute(qry % where)
54 | 		pages = {}
55 | 		for res in cur.fetchall():
56 | 			cookies = res['cookies']
57 | 			if cookies:
58 | 				#print cookies
59 | 				cookies = json.loads(cookies)
60 | 				for cookie in cookies:
61 | 					if cookie['name'] == cname:
62 | 						cookie['value'] = cvalue
63 | 						wcur.execute("update request set cookies=? where id=?",(json.dumps(cookies), res['id']))
64 | 
65 | 		conn.commit()
66 | 		cur.close()
67 | 		wcur.close()
68 | 		conn.close()
69 | 


--------------------------------------------------------------------------------
/core/scan/scanners/ck401.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | from __future__ import unicode_literals
14 | 
15 | import re
16 | from core.scan.base_scanner import BaseScanner
17 | 
18 | 
19 | class Ck401(BaseScanner):
20 | 	def init(self, argv):
21 | 		return True
22 | 	
23 | 	def get_settings(self):
24 | 		return dict(			
25 | 			request_types = "link,redirect,xhr,form", 
26 | 			num_threads = 10,
27 | 			process_timeout = 20 ,
28 | 			scanner_exe = "/usr/bin/env curl"
29 | 		)
30 | 
31 | 	def get_cmd(self, request, tmp_dir):
32 | 		#cookies = ["%s=%s" % (c.name,c.value) for c in request.cookies]
33 | 		#cookies_str = " -H 'Cookie: %s'" % " ;".join(cookies) if len(cookies) > 0 else ""
34 | 		method = ["-X", "POST"] if request.method == "POST" else []
35 | 		referer = ["-H", "'Referer: %s'" % request.referer] if request.referer else []
36 | 		data = ["--data", "'%s'" % request.data] if request.data else []
37 | 		
38 | 	
39 | 		cmd = [ "-i" ] + referer + method + data + [request.url]		
40 | 		# print " ".join(cmd)
41 | 		# return False
42 | 
43 | 		return cmd
44 | 
45 | 	def scanner_executed(self, request, out, err, tmp_dir, cmd):
46 | 		if not re.search("^HTTP/1.1 401 Unauthorized", out) and not re.search("action\[login\]",out):			
47 | 			self.save_vulnerability(request, "cross-session", " ".join(cmd)+ "\n" + out)
48 | 		


--------------------------------------------------------------------------------
/core/crawl/lib/urlfinder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | import re
14 | from HTMLParser import HTMLParser
15 | from urlparse import urljoin, urlparse
16 | 
17 | 
18 | class UrlFinder:
19 |     def __init__(self, html):
20 |         self.html = html
21 | 
22 |     def get_urls(self):
23 | 
24 |         try:
25 |             parser = UrlHTMLParser()
26 |             parser.feed(self.html)
27 |         except:
28 |             raise
29 | 
30 |         return parser.urls
31 | 
32 | 
33 | class UrlHTMLParser(HTMLParser):
34 |     def __init__(self):
35 | 
36 |         HTMLParser.__init__(self)
37 |         self.base_url = ""
38 |         self.urls = []
39 | 
40 |     def handle_starttag(self, tag, attrs):
41 |         # more info about the <base> tag: https://www.w3.org/wiki/HTML/Elements/base
42 |         if tag == "base":
43 |             for key, val in attrs:
44 |                 if key == "href":
45 |                     self.base_url = urlparse(val.strip()).geturl()
46 | 
47 |         elif tag == "a":
48 |             for key, val in attrs:
49 |                 if key == "href":
50 |                     if re.match("^https?://", val, re.I):
51 |                         self.urls.extend([val])
52 |                     elif not re.match("^[a-z]+:", val, re.I) and not val.startswith("#"):
53 |                         self.urls.extend([urljoin(self.base_url, val)])
54 | 


--------------------------------------------------------------------------------
/tests/lib_tests/request_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from mock import call, patch
 4 | 
 5 | from core.lib.request import Request
 6 | 
 7 | 
 8 | class RequestTestCase(unittest.TestCase):
 9 | 	@patch('core.lib.request.remove_tokens')
10 | 	def test___eq__(self, remove_tokens_mock):
11 | 		a = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
12 | 		b = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
13 | 		self.assertTrue(a == b)
14 | 
15 | 		a = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
16 | 		b = Request("type2", "method1", "url1", data="data1", http_auth="auth1")
17 | 		self.assertFalse(a == b)
18 | 
19 | 		a = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
20 | 		b = Request("type1", "method2", "url1", data="data1", http_auth="auth1")
21 | 		self.assertFalse(a == b)
22 | 
23 | 		a = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
24 | 		b = Request("type1", "method1", "url2", data="data1", http_auth="auth1")
25 | 		self.assertFalse(a == b)
26 | 
27 | 		a = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
28 | 		b = Request("type1", "method1", "url1", data="data2", http_auth="auth1")
29 | 		self.assertFalse(a == b)
30 | 
31 | 		a = Request("type1", "method1", "url1", data="data1", http_auth="auth1")
32 | 		b = Request("type1", "method1", "url1", data="data1", http_auth="auth2")
33 | 		self.assertFalse(a == b)
34 | 
35 | 		a = Request("type1", "method1", "url1")
36 | 		b = None
37 | 		self.assertFalse(a == b)
38 | 		self.assertEqual(remove_tokens_mock.call_count, 0)
39 | 
40 | 	@patch('core.lib.request.remove_tokens', return_value="some data")
41 | 	def test___eq__with_post(self, remove_tokens_mock):
42 | 		a = Request("type1", "POST", "url1", data="dataXXXX")
43 | 		b = Request("type1", "POST", "url1", data="dataYYYY")
44 | 
45 | 		self.assertTrue(a == b)
46 | 		self.assertEqual(remove_tokens_mock.call_args_list, [call("dataXXXX"), call("dataYYYY")])
47 | 


--------------------------------------------------------------------------------
/core/constants.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under
 8 | the terms of the GNU General Public License as published by the Free Software
 9 | Foundation; either version 2 of the License, or (at your option) any later
10 | version.
11 | """
12 | 
13 | THSTAT_WAITING = 0
14 | THSTAT_RUNNING = 1
15 | 
16 | CRAWLSCOPE_DOMAIN = "domain"
17 | CRAWLSCOPE_DIRECTORY = "directory"
18 | CRAWLSCOPE_URL = "url"
19 | 
20 | CRAWLOUTPUT_RENAME = "rename"
21 | CRAWLOUTPUT_OVERWRITE = "overwrite"
22 | CRAWLOUTPUT_RESUME = "resume"
23 | CRAWLOUTPUT_COMPLETE = "complete"
24 | 
25 | CRAWLMODE_PASSIVE = "passive"
26 | CRAWLMODE_ACTIVE = "active"
27 | CRAWLMODE_AGGRESSIVE = "aggressive"
28 | 
29 | REQTYPE_LINK = "link"
30 | REQTYPE_XHR = "xhr"
31 | REQTYPE_WS = "websocket"
32 | REQTYPE_JSONP = "jsonp"
33 | REQTYPE_FORM = "form"
34 | REQTYPE_REDIRECT = "redirect"
35 | REQTYPE_UNKNOWN = "unknown"
36 | 
37 | ERROR_CONTENTTYPE = "contentType"
38 | ERROR_TIMEOUT = "timeout"
39 | ERROR_PROBE_TO = "probe_timeout"
40 | ERROR_FORCE_STOP = "interruptReceived"
41 | ERROR_PROBEKILLED = "probe_killed"
42 | ERROR_PROBEFAILURE = "probe_failure"
43 | ERROR_MAXREDIRECTS = "too_many_redirects"
44 | ERROR_CRAWLDEPTH = "crawler_depth_limit_reached"
45 | VULNTYPE_SQLI = "sqli"
46 | VULNTYPE_XSS = "xss"
47 | 
48 | CRAWLER_DEFAULTS = {
49 |     "process_timeout": 300,  # when lots of element(~25000) are added dynamically it can take some time..
50 |     "num_threads": 10,
51 |     "max_redirects": 10,
52 |     "max_depth": 100,
53 |     "max_post_depth": 10,
54 |     "output_mode": CRAWLOUTPUT_RENAME,
55 |     "scope": CRAWLSCOPE_DOMAIN,
56 |     "mode": CRAWLMODE_AGGRESSIVE,
57 |     "proxy": None,
58 |     "group_qs": False,
59 |     "user_agent": 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 '
60 |                   '(KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
61 |     "override_timeout_functions": True,
62 |     "crawl_forms": True,  # only if mode == CRAWLMODE_AGGRESSIVE
63 |     "random_seed": "",
64 |     "use_urllib_onerror": True,
65 |     "set_referer": True,
66 | }
67 | 


--------------------------------------------------------------------------------
/core/lib/shell.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under
 8 | the terms of the GNU General Public License as published by the Free Software
 9 | Foundation; either version 2 of the License, or (at your option) any later
10 | version.
11 | """
12 | 
13 | import subprocess
14 | import sys
15 | import threading
16 | import time
17 | 
18 | 
19 | class CommandExecutor:
20 |     """
21 |     Executes shell command and returns its output.
22 |     The process is killed afer <timeout> seconds
23 |     """
24 | 
25 |     def __init__(self, cmd, stderr=False):
26 |         # self.cmd = cmd
27 |         self.cmd = [c.encode("utf-8") for c in cmd]
28 |         self.stderr = stderr
29 |         self.out = None
30 |         self.err = None
31 |         self.process = None
32 |         self.thread = None
33 |         self.result = None
34 | 
35 |     def close(self, kill_timeout):
36 |         tries = 0
37 |         self.process.terminate()
38 |         while tries < kill_timeout:
39 |             if self.process.poll() is not None:
40 |                 return
41 |             else:
42 |                 time.sleep(1)
43 |                 tries += 1
44 |         self.process.kill()
45 |         self.thread.join()
46 |         self.out = None
47 |         self.err = "Executor: execution timeout"
48 | 
49 |     def execute(self, timeout):
50 | 
51 |         def executor():
52 |             try:
53 |                 # close_fds=True is needed in threaded programs
54 | 
55 |                 self.process = subprocess.Popen(self.cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=0,
56 |                                                 close_fds=sys.platform != "win32")
57 |                 self.out, self.err = self.process.communicate()
58 |             except Exception as e:
59 |                 raise
60 | 
61 |         self.thread = threading.Thread(target=executor)
62 |         self.thread.start()
63 | 
64 |         self.thread.join(int(timeout))
65 | 
66 |         if self.thread.is_alive():
67 |             self.close(5)
68 | 
69 |         return self.out if not self.stderr else (self.out, self.err)
70 | 


--------------------------------------------------------------------------------
/core/crawl/lib/probe.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | from core.constants import *
14 | from core.lib.cookie import Cookie
15 | from core.lib.request import Request
16 | 
17 | 
18 | class Probe:
19 |     def __init__(self, data, parent):
20 |         self.status = "ok"
21 |         self.requests = []
22 |         self.cookies = []
23 |         self.redirect = None
24 |         # if True the probe returned no error BUT the json is not closed properly
25 |         self.partialcontent = False
26 |         self.user_output = []
27 | 
28 |         status = data.pop()
29 | 
30 |         if status['status'] == "error":
31 |             self.status = "error"
32 |             self.errcode = status['code']
33 | 
34 |         if "partialcontent" in status:
35 |             self.partialcontent = status['partialcontent']
36 | 
37 |         # grap cookies before creating rquests
38 |         for key, val in data:
39 |             if key == "cookies":
40 |                 for cookie in val:
41 |                     self.cookies.append(Cookie(cookie, parent.url))
42 | 
43 |         if "redirect" in status:
44 |             self.redirect = status['redirect']
45 |             r = Request(REQTYPE_REDIRECT, "GET", self.redirect, parent=parent, set_cookie=self.cookies,
46 |                         parent_db_id=parent.db_id)
47 |             self.requests.append(r)
48 | 
49 |         for key, val in data:
50 |             if key == "request":
51 |                 trigger = val['trigger'] if 'trigger' in val else None
52 |                 r = Request(val['type'], val['method'], val['url'], parent=parent, set_cookie=self.cookies,
53 |                             data=val['data'], trigger=trigger, parent_db_id=parent.db_id)
54 |                 self.requests.append(r)
55 |             elif key == "user":
56 |                 self.user_output.append(val)
57 | 
58 | 
59 | 
60 |             # @TODO handle cookies set by ajax (in probe too)
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## HTCAP
 2 | 
 3 | Htcap is a web application scanner able to crawl single page application (SPA) in a recursive manner by intercepting xhr calls and DOM changes.  
 4 | Htcap is not just another vulnerability scanner since it's focused mainly on the crawling process and uses external tools to discover vulnerabilities. It's designed to be a tool for both manual and automated penetration test of modern web applications.
 5 | 
 6 | More infos at [htcap.org](http://htcap.org).
 7 | 
 8 | ### Difference with the upstream version
 9 | 
10 | * Use Chrome + Puppeteer instead of PhantomJS as crawl engine
11 | * Add option to restart/complete a crawl
12 | * Rewrite the injected code of the javascript crawler to take into account the [javascript event loop](https://www.youtube.com/watch?v=8aGhZQkoFbQ) (ie. javascript is async, stop using `setTimeout` calls) and make use of the [DOM mutation event handler](https://developer.mozilla.org/en-US/docs/Web/API/MutationObserver)
13 | * Drop the flimsy supported feature "custom user script" in the crawler
14 | * Add unittest for the crawler part
15 | * Mainly this fixes issues (and other): [#9](https://github.com/segment-srl/htcap/issues/9), [#11](https://github.com/segment-srl/htcap/issues/11), [#16](https://github.com/segment-srl/htcap/issues/16), [#19](https://github.com/segment-srl/htcap/issues/19), [#22](https://github.com/segment-srl/htcap/issues/22), [#23](https://github.com/segment-srl/htcap/issues/23), [#28](https://github.com/segment-srl/htcap/issues/28) and [#31](https://github.com/segment-srl/htcap/issues/31)
16 | 
17 | ## SETUP
18 | 
19 | ### Requirements
20 | 
21 |  1. Python 2.7
22 |  2. NodeJS v8.9.4 (for the crawler)
23 |  3. Sqlmap (for sqlmap scanner module)
24 |  4. Arachni (for arachni scanner module)
25 | 
26 | ### Installation
27 | 
28 | ```console
29 | git clone git@github.com:delvelabs/htcap.git htcap
30 | cd htcap
31 | pip install -r requirements.txt
32 | cd core/crawl/probe/
33 | npm install
34 | ```
35 | 
36 | ## Documentation
37 | Try `python htcap.py -h` for help
38 | 
39 | ## LICENSE
40 | 
41 | This program is free software; you can redistribute it and/or modify it under the terms of the [GNU General Public License](https://www.gnu.org/licenses/gpl-2.0.html) as published by the Free Software Foundation; either version 2 of the License, or(at your option) any later version.
42 | 


--------------------------------------------------------------------------------
/core/util/utilities/lsajax.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | import sys
 3 | import sqlite3
 4 | import json
 5 | import os
 6 | 
 7 | from core.util.base_util import BaseUtil
 8 | 
 9 | reload(sys)
10 | sys.setdefaultencoding('utf8')
11 | 
12 | 
13 | class Lsajax(BaseUtil):
14 | 
15 | 	@staticmethod
16 | 	def get_settings():
17 | 		return dict(
18 | 			descr = "List all pages and related ajax calls",
19 | 			optargs = 'd',
20 | 			minargs = 1
21 | 		)
22 | 
23 | 	def usage(self):
24 | 		return (
25 | 			"usage: %s <dbfile> [<sql_where_clause>]\n"
26 | 			"  Options:\n    -d    print POST data\n\n"
27 | 			% self.utilname
28 | 		)
29 | 
30 | 	def main(self, args, opts):
31 | 		qry = """
32 | 			SELECT r.id, r.url as page, r.referer, a.method, a.url,a.data,a.trigger
33 | 			FROM request r inner join request a on r.id=a.id_parent
34 | 			WHERE (a.type='xhr')
35 | 			AND
36 | 			%s
37 | 		"""
38 | 
39 | 		# try:
40 | 		# 	opts, args = getopt.getopt(argv[1:], 'd')
41 | 		# except getopt.GetoptError as err:
42 | 		# 	print str(err)
43 | 		# 	sys.exit(1)
44 | 
45 | 
46 | 		# if len(args) < 1:
47 | 		# 	print (
48 | 		# 		"usage: %s <dbfile> [<final_part_of_query>]\n"
49 | 		# 		"  Options:\n    -d    print POST data\n\n"
50 | 		# 		"  Base query: %s" % (argv[0], qry)
51 | 		# 	)
52 | 		# 	sys.exit(1)
53 | 
54 | 
55 | 		print_post_data = False
56 | 
57 | 		for o, v in opts:
58 | 			if o == '-d':
59 | 				print_post_data = True
60 | 
61 | 
62 | 		dbfile = args[0]
63 | 
64 | 		if not os.path.exists(dbfile):
65 | 			print "No such file %s" % dbfile
66 | 			sys.exit(1)
67 | 
68 | 		where = args[1] if len(args) > 1 else "1=1"
69 | 
70 | 		conn = sqlite3.connect(dbfile)
71 | 		conn.row_factory = sqlite3.Row 
72 | 
73 | 		cur = conn.cursor()
74 | 		cur.execute(qry % where)
75 | 		pages = {}
76 | 		for res in cur.fetchall():
77 | 			page = (res['id'], res['page'], res['referer'])
78 | 			trigger = json.loads(res['trigger']) if res['trigger'] else None
79 | 			trigger_str = "%s.%s() -> " % (trigger['element'], trigger['event']) if trigger else ""
80 | 			data = " data: %s" % (res['data']) if print_post_data and res['data'] else ""
81 | 			descr = "  %s%s %s%s" % (trigger_str, res['method'], res['url'], data)
82 | 
83 | 			if page in pages: 
84 | 				pages[page].append(descr) 
85 | 			else: 
86 | 				pages[page] = [descr]
87 | 
88 | 		for page,ajax in pages.items():
89 | 			print "Request ID: %s\nPage URL:   %s\nReferer:    %s\nAjax requests:" % page 
90 | 			for aj in ajax:
91 | 				print aj
92 | 			print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \n"
93 | 


--------------------------------------------------------------------------------
/tests/crawl_tests/probe_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import mock
 3 | 
 4 | from core.constants import CRAWLER_DEFAULTS
 5 | from core.crawl.crawler import Crawler
 6 | from core.crawl.crawler_thread import CrawlerThread
 7 | from core.crawl.lib.shared import Shared
 8 | from core.lib.request import Request
 9 | 
10 | 
11 | class SetProbeTest(unittest.TestCase):
12 |     def setup_shared_object(self,
13 |                             mode=CRAWLER_DEFAULTS['mode'],
14 |                             timeout=CRAWLER_DEFAULTS['process_timeout'],
15 |                             user_agent=CRAWLER_DEFAULTS['user_agent'],
16 |                             proxy=CRAWLER_DEFAULTS['proxy'],
17 |                             seed=CRAWLER_DEFAULTS['random_seed'],
18 |                             override=CRAWLER_DEFAULTS['override_timeout_functions'],
19 |                             excluded='',
20 |                             ):
21 |         Shared.excluded_urls = excluded
22 |         Shared.options['random_seed'] = seed
23 |         Shared.options['proxy'] = proxy
24 |         Shared.options['mode'] = mode
25 |         Shared.options['process_timeout'] = timeout
26 |         Shared.options['user_agent'] = user_agent
27 |         Shared.options['override_timeout_functions'] = override
28 | 
29 |     @mock.patch('core.crawl.crawler.get_probe_cmd', return_value=['/usr/bin/node'])
30 |     def test_setting_probe_calls_node(self, mock_probe_cmd):
31 |         args = ['http://example.com', 'out.txt']
32 |         crawler = Crawler(args)
33 |         self.setup_shared_object()
34 |         crawler._set_probe()
35 |         self.assertIn("index.js", crawler._probe["cmd"][1])
36 |         self.assertIn('node', crawler._probe["cmd"][0])
37 | 
38 |     @mock.patch('core.crawl.crawler.get_probe_cmd', return_value=['/usr/bin/node'])
39 |     def test_set_probe_puts_proxy_in_options(self, mock_probe_cmd):
40 |         args = ['http://example.com', 'out.txt']
41 |         crawler = Crawler(args)
42 |         self.setup_shared_object(proxy={'proto': 'http', 'host': '254.254.254.254', 'port': '1'})
43 |         crawler._set_probe()
44 | 
45 |         self.assertIn('--proxy=http://254.254.254.254:1', crawler._probe["options"])
46 |         self.assertEqual(len(crawler._probe["cmd"]), 2)
47 | 
48 | 
49 | class SendProbeTest(unittest.TestCase):
50 |     def setup_request_object(self):
51 |         pass
52 | 
53 |     def test_set_params_for_probe(self):
54 |         req = Request("type1", "POST", "http://example.com", data="example data", http_auth="auth1")
55 |         Shared.options['set_referer'] = None
56 |         thread = CrawlerThread()
57 |         params = thread._set_probe_params(req)
58 |         print(req)
59 |         self.assertIn("http://example.com/", params)
60 |         pass
61 | 


--------------------------------------------------------------------------------
/tests/crawl_tests/crawler_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from mock import patch
 4 | 
 5 | from core.constants import *
 6 | from core.crawl.crawler import Crawler
 7 | 
 8 | 
 9 | class CrawlerTest(unittest.TestCase):
10 |     @patch('core.crawl.crawler.generate_filename', return_value='my_out_file-1')
11 |     @patch('core.crawl.crawler.Database')
12 |     def test__get_database_rename_outfile(self, database_mock, generate_filename_mock):
13 |         db = Crawler._get_database('my_out_file', CRAWLOUTPUT_RENAME)
14 | 
15 |         generate_filename_mock.assert_called_once_with('my_out_file', out_file_overwrite=False)
16 |         database_mock.assert_called_once_with('my_out_file-1')
17 |         db.initialize.assert_called_once()
18 | 
19 |     @patch('core.crawl.crawler.Database')
20 |     @patch('core.crawl.crawler.os.path.exists', return_value=True)
21 |     @patch('core.crawl.crawler.os.path.getsize', return_value=0)
22 |     @patch('core.crawl.crawler.os.remove')
23 |     def test__get_database_overwrite_outfile(
24 |             self,
25 |             os_remove_mock,
26 |             os_path_getsize_mock,
27 |             os_path_exists_mock,
28 |             database_mock):
29 |         db = Crawler._get_database('my_out_file', CRAWLOUTPUT_OVERWRITE)
30 | 
31 |         os_path_getsize_mock.assert_called_with('my_out_file')
32 |         os_path_exists_mock.assert_called_with('my_out_file')
33 |         self.assertEqual(os_path_exists_mock.call_count, 3)
34 |         os_remove_mock.assert_called_once_with('my_out_file')
35 |         database_mock.assert_called_once_with('my_out_file')
36 |         db.initialize.assert_called_once()
37 | 
38 |     @patch('core.crawl.crawler.Database')
39 |     @patch('core.crawl.crawler.os.path.exists', return_value=True)
40 |     @patch('core.crawl.crawler.os.path.getsize', return_value=2)
41 |     def test__get_database_complete_outfile(self, os_path_getsize_mock, os_path_exists_mock, database_mock):
42 |         db = Crawler._get_database('my_out_file', CRAWLOUTPUT_COMPLETE)
43 | 
44 |         database_mock.assert_called_once_with('my_out_file')
45 |         os_path_getsize_mock.assert_called_with('my_out_file')
46 |         os_path_exists_mock.assert_called_with('my_out_file')
47 |         self.assertEqual(os_path_exists_mock.call_count, 2)
48 |         self.assertEqual(db.initialize.call_count, 0)
49 | 
50 |     @patch('core.crawl.crawler.Database')
51 |     @patch('core.crawl.crawler.os.path.exists', return_value=False)
52 |     def test__get_database_resume_new_outfile(self, os_path_exists_mock, database_mock):
53 |         db = Crawler._get_database('my_out_file', CRAWLOUTPUT_RESUME)
54 | 
55 |         database_mock.assert_called_once_with('my_out_file')
56 |         os_path_exists_mock.assert_called_once_with('my_out_file')
57 |         self.assertEqual(db.initialize.call_count, 1)
58 | 


--------------------------------------------------------------------------------
/scripts/quickscan.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | PY="/usr/bin/python"
  4 | #THISFILE=$(readlink $0 || echo $0)
  5 | CURDIR=$( cd "$(dirname "$(readlink $0 || echo $0)")" ; pwd -P )
  6 | 
  7 | # EXPORT="\"$CURDIR/../scripts/htmlreport.py\""
  8 | # VULNS="\"$CURDIR/../scripts/vulns.py\""
  9 | 
 10 | HTCAP="\"$CURDIR/../htcap.py\""
 11 | EXPORT="$HTCAP util report"
 12 | VULNS="$HTCAP util lsvuln"
 13 | yes=false
 14 | requests='link,redirect,form,xhr,jsonp'
 15 | cookies=""
 16 | excluded=""
 17 | 
 18 | function yesno {
 19 | 	if [ $1 = false ]; then
 20 | 		read yesno
 21 | 	else
 22 | 		yesno="y"
 23 | 	fi
 24 | 
 25 | 	echo $yesno
 26 | }
 27 | 
 28 | if [ $# -lt 1 ];then
 29 | 	echo "usage "$(basename $0) "[options]" "<host>"
 30 | 	echo "options:"
 31 | 	echo "  -r   set request types (default: " $requests ")"
 32 | 	echo "  -y   say yes to all questions"
 33 | 	echo "  -c   set cookies"
 34 | 	echo "  -x   set excluded urls"
 35 | 	exit 1
 36 | fi
 37 | 
 38 | while getopts "r:yc:x:" opt; do
 39 |     case "$opt" in 
 40 |     r)  requests=$OPTARG
 41 |         ;;
 42 |     y)  yes=true
 43 |         ;;
 44 |     c)	cookies="-c '$OPTARG'"
 45 | 		;;
 46 | 	x)	excluded="-x '$OPTARG'"
 47 | 		;;
 48 |     esac
 49 | done
 50 | shift $((OPTIND-1))
 51 | 
 52 | HOST=$1
 53 | 
 54 | 
 55 | OUTFILE=`echo $HOST | sed -E 's/^https?:\/\///' | sed 's/\./_/g' | sed 's/\/.*//g'`
 56 | 
 57 | if [ -e "$OUTFILE.db" ];then
 58 | 	echo -n "$OUTFILE.db already exists. Overwrite it? (y/N): " && $yes && echo "y"
 59 | 	if [ "$(yesno $yes)" = "y" ]; then
 60 | 		rm "$OUTFILE.db"
 61 | 	else
 62 | 		exit 1
 63 | 	fi
 64 | fi
 65 | 
 66 | 
 67 | echo $HTCAP crawl $cookies $excluded $HOST $OUTFILE.db | xargs $PY || exit 1
 68 | echo -n "Run arachni? (y/N): " && $yes && echo "y"
 69 | if [ "$(yesno $yes)" = "y" ]; then
 70 | 	echo $HTCAP scan -r $requests arachni $OUTFILE.db | xargs $PY || exit 1
 71 | fi
 72 | echo -n "Run sqlmap? (y/N): " && $yes && echo "y"
 73 | if [ "$(yesno $yes)" = "y" ]; then
 74 | 	echo $HTCAP scan -r $requests sqlmap $OUTFILE.db | xargs $PY || exit 1
 75 | fi
 76 | echo 
 77 | 
 78 | if [ "`echo $VULNS $OUTFILE.db | xargs $PY`" = "" ];then
 79 | 	echo "No vulnerabilities found"
 80 | else
 81 | 	echo "Detected vulnerabilities:"
 82 | 	/bin/bash -c 'sqlite3 -version' > /dev/null 2>&1 
 83 | 	if [ $? = 0 ]; then
 84 | 		echo "SELECT '  Type: ',type, ', found ',count(type) FROM vulnerability GROUP BY type ORDER BY count(type) DESC;" | sqlite3 -separator "" $OUTFILE.db
 85 | 	else 
 86 | 		echo "  Warning: unable to run sqlite3 command"
 87 | 	fi
 88 | 	echo
 89 | fi
 90 | 
 91 | rm "$OUTFILE".html 2> /dev/null
 92 | echo $EXPORT $OUTFILE.db $OUTFILE.html | xargs $PY
 93 | 
 94 | opener=""
 95 | while [ "$opener" = "" ];do 
 96 | 	echo -n "Open $OUTFILE.html with command (^C to abort): "
 97 | 	read opener
 98 | 	if [ "$opener" != "" ];then
 99 | 		$opener "$OUTFILE".html
100 | 	fi
101 | done
102 | 
103 | exit 0
104 | 


--------------------------------------------------------------------------------
/core/crawl/lib/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*- 
 2 | 
 3 | """
 4 | HTCAP - beta 1
 5 | Author: filippo.cavallarin@wearesegment.com
 6 | 
 7 | This program is free software; you can redistribute it and/or modify it under 
 8 | the terms of the GNU General Public License as published by the Free Software 
 9 | Foundation; either version 2 of the License, or (at your option) any later 
10 | version.
11 | """
12 | 
13 | import posixpath
14 | import re
15 | from urlparse import urlsplit
16 | 
17 | from core.constants import *
18 | from core.crawl.lib.shared import Shared
19 | from core.lib.utils import group_qs_params
20 | 
21 | 
22 | def request_in_scope(request):
23 |     url = request.url
24 |     purl = urlsplit(url)
25 |     spurl = urlsplit(Shared.start_url)
26 |     scope = Shared.options['scope']
27 |     in_scope = False
28 | 
29 |     # check for scopes
30 |     if scope == CRAWLSCOPE_DOMAIN:
31 |         for pattern in Shared.allowed_domains:
32 |             if re.match(pattern, purl.hostname):
33 |                 in_scope = True
34 |                 break
35 | 
36 |     elif scope == CRAWLSCOPE_DIRECTORY:
37 |         if purl.hostname != spurl.hostname:
38 |             in_scope = False
39 |         else:
40 |             path = [p for p in posixpath.dirname(purl.path).split("/") if p]
41 |             spath = [p for p in posixpath.dirname(spurl.path).split("/") if p]
42 |             in_scope = path[:len(spath)] == spath
43 | 
44 |     elif scope == CRAWLSCOPE_URL:
45 |         in_scope = url == Shared.start_url
46 | 
47 |     # check for excluded urls
48 |     for pattern in Shared.excluded_urls:
49 |         if re.match(pattern, request.url):
50 |             in_scope = False
51 |             break
52 | 
53 |     return in_scope
54 | 
55 | 
56 | def adjust_requests(requests):
57 |     """
58 |     adjust an array of requsts according to current status/settings
59 |      1. sets the out_of_scope property
60 |      2. normalize url accoding to user settings
61 |     """
62 | 
63 |     for request in requests:
64 |         if request.type == REQTYPE_UNKNOWN or not request_in_scope(request):
65 |             request.out_of_scope = True
66 | 
67 |         if Shared.options['group_qs']:
68 |             request.url = group_qs_params(request.url)
69 | 
70 |     return requests
71 | 
72 | 
73 | def request_depth(request):
74 |     if request.parent == None:
75 |         return 1
76 | 
77 |     return 1 + request_depth(request.parent)
78 | 
79 | 
80 | def request_post_depth(request):
81 |     if request.method != "POST":
82 |         return 0
83 | 
84 |     if request.parent == None or request.parent.method != "POST":
85 |         return 1
86 | 
87 |     return 1 + request_post_depth(request.parent)
88 | 
89 | 
90 | def request_is_crawlable(request):
91 |     if request.out_of_scope:
92 |         return False
93 | 
94 |     types = [REQTYPE_LINK, REQTYPE_REDIRECT]
95 |     if Shared.options['mode'] == CRAWLMODE_AGGRESSIVE and Shared.options['crawl_forms']:
96 |         types.append(REQTYPE_FORM)
97 | 
98 |     return request.type in types and re.match("^https?://", request.url, re.I)
99 | 


--------------------------------------------------------------------------------
/core/util/utilities/usgen.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - htcap.org
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under
  8 | the terms of the GNU General Public License as published by the Free Software
  9 | Foundation; either version 2 of the License, or (at your option) any later
 10 | version.
 11 | """
 12 | 
 13 | import sys
 14 | import json
 15 | import os
 16 | 
 17 | from core.lib.utils import *
 18 | from core.util.base_util import BaseUtil
 19 | 
 20 | 
 21 | class Usgen(BaseUtil):
 22 | 
 23 | 	@staticmethod
 24 | 	def get_settings():
 25 | 		return dict(
 26 | 			descr = "Generate a sample user script",
 27 | 			optargs = '',
 28 | 			minargs = 1
 29 | 		)
 30 | 
 31 | 	def usage(self):
 32 | 		return (
 33 | 			"%s\n"
 34 | 			"usage: %s  <file>\n"
 35 | 			% (self.get_settings()['descr'], self.utilname)
 36 | 		)
 37 | 
 38 | 
 39 | 	def main(self, args, opts):
 40 | 		usfile = generate_filename(args[0], 'js', False, True)
 41 | 		try:
 42 | 			with open(usfile,'w') as f:
 43 | 				f.write(CONTENT)
 44 | 			print "User Script saved to %s" % usfile
 45 | 		except Exception as e:
 46 | 			print "Unable to write file %s" % usfile
 47 | 			sys.exit(1)
 48 | 
 49 | 
 50 | CONTENT = """/*
 51 | UI Methods:
 52 | 	ui.print(message) - save a per-request user message into the request table
 53 | 	ui.fread(path_to_file) - read from file
 54 | 	ui.fwrite(path_to_file, content, mode) - write to file
 55 | 	ui.render(path_to_file) - save a screenshot of the page current state
 56 | 	ui.triggerEvent(element, event) - trigger an event
 57 | */
 58 | 
 59 | {
 60 |   onInit: function(ui){
 61 |     // override natove methods
 62 |     window.prompt = function(){ return "AAA" };
 63 |     // init local variables
 64 |     ui.vars.cnt = 0; 
 65 |   },
 66 | 
 67 |   onStart: function(ui){}, 
 68 | 
 69 |   onTriggerEvent: function(ui, element, event){
 70 |     // cancel trigger if element has class kill-all
 71 |     if(element.matches(".kill-all")) return false;
 72 |   },
 73 | 
 74 |   onEventTriggered: function(ui, element, event){},
 75 | 
 76 |   onFillInput: function(ui, element){
 77 |     // here it's possible to force a value or prevent it to be filled
 78 |     // WARNING: do NOT set dynamic values! for instance something like
 79 |     //  element.value = Math.random()
 80 |     // will lead to INFINITE CRAWLING if you crawl forms
 81 | 
 82 |     if(element.id == "car_vendor"){
 83 |       element.value = "Ferrari";
 84 |       return false;
 85 |     }
 86 |   },
 87 | 
 88 |   onXhr: function(ui, request){
 89 |     // cancel XHR request if url matches XXX
 90 |     if(request.url.match(/XXX/))
 91 |       return false
 92 |   },
 93 | 
 94 |   onAllXhrsCompleted: function(ui){},
 95 | 
 96 |   onDomModified: function(ui, rootElements, allElements){
 97 |     // save a screenshot on every DOM change
 98 |     ui.render(ui.id + "-screen-" + ui.vars.cnt + ".png");
 99 |     ui.vars.cnt++; 
100 |   },
101 | 
102 |   onEnd: function(ui){} 
103 | }
104 | """
105 | 
106 | 


--------------------------------------------------------------------------------
/core/lib/thirdparty/pysocks/sockshandler.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | SocksiPy + urllib2 handler
 4 | 
 5 | version: 0.3
 6 | author: e<e@tr0ll.in>
 7 | 
 8 | This module provides a Handler which you can use with urllib2 to allow it to tunnel your connection through a socks.sockssocket socket, with out monkey patching the original socket...
 9 | """
10 | import ssl
11 | 
12 | try:
13 |     import urllib2
14 |     import httplib
15 | except ImportError: # Python 3
16 |     import urllib.request as urllib2
17 |     import http.client as httplib
18 | 
19 | import socks # $ pip install PySocks
20 | 
21 | def merge_dict(a, b):
22 |     d = a.copy()
23 |     d.update(b)
24 |     return d
25 | 
26 | class SocksiPyConnection(httplib.HTTPConnection):
27 |     def __init__(self, proxytype, proxyaddr, proxyport=None, rdns=True, username=None, password=None, *args, **kwargs):
28 |         self.proxyargs = (proxytype, proxyaddr, proxyport, rdns, username, password)
29 |         httplib.HTTPConnection.__init__(self, *args, **kwargs)
30 | 
31 |     def connect(self):
32 |         self.sock = socks.socksocket()
33 |         self.sock.setproxy(*self.proxyargs)
34 |         if type(self.timeout) in (int, float):
35 |             self.sock.settimeout(self.timeout)
36 |         self.sock.connect((self.host, self.port))
37 | 
38 | class SocksiPyConnectionS(httplib.HTTPSConnection):
39 |     def __init__(self, proxytype, proxyaddr, proxyport=None, rdns=True, username=None, password=None, *args, **kwargs):
40 |         self.proxyargs = (proxytype, proxyaddr, proxyport, rdns, username, password)
41 |         httplib.HTTPSConnection.__init__(self, *args, **kwargs)
42 | 
43 |     def connect(self):
44 |         sock = socks.socksocket()
45 |         sock.setproxy(*self.proxyargs)
46 |         if type(self.timeout) in (int, float):
47 |             sock.settimeout(self.timeout)
48 |         sock.connect((self.host, self.port))
49 |         self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
50 | 
51 | class SocksiPyHandler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
52 |     def __init__(self, *args, **kwargs):
53 |         self.args = args
54 |         self.kw = kwargs
55 |         urllib2.HTTPHandler.__init__(self)
56 | 
57 |     def http_open(self, req):
58 |         def build(host, port=None, timeout=0, **kwargs):
59 |             kw = merge_dict(self.kw, kwargs)
60 |             conn = SocksiPyConnection(*self.args, host=host, port=port, timeout=timeout, **kw)
61 |             return conn
62 |         return self.do_open(build, req)
63 | 
64 |     def https_open(self, req):
65 |         def build(host, port=None, timeout=0, **kwargs):
66 |             kw = merge_dict(self.kw, kwargs)
67 |             conn = SocksiPyConnectionS(*self.args, host=host, port=port, timeout=timeout, **kw)
68 |             return conn
69 |         return self.do_open(build, req)
70 | 
71 | if __name__ == "__main__":
72 |     import sys
73 |     try:
74 |         port = int(sys.argv[1])
75 |     except (ValueError, IndexError):
76 |         port = 9050
77 |     opener = urllib2.build_opener(SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "localhost", port))
78 |     print("HTTP: " + opener.open("http://httpbin.org/ip").read().decode())
79 |     print("HTTPS: " + opener.open("https://httpbin.org/ip").read().decode())
80 | 


--------------------------------------------------------------------------------
/core/scan/scanners/sqlmap.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | import sys
 15 | import time
 16 | import re
 17 | import json
 18 | import base64
 19 | import uuid
 20 | import getopt
 21 | 
 22 | import threading
 23 | 
 24 | from urlparse import urlparse, urljoin, parse_qs, parse_qsl, urlsplit
 25 | 
 26 | from core.lib.exception import *
 27 | from core.lib.cookie import Cookie
 28 | 
 29 | from core.scan.base_scanner import BaseScanner
 30 | from core.lib.utils import *
 31 | 
 32 | 
 33 | 
 34 | class Sqlmap(BaseScanner):
 35 | 
 36 | 	def init(self, argv):
 37 | 		self.skip_duplicates = True
 38 | 
 39 | 		try:
 40 | 			opts, args = getopt.getopt(argv, 'hs')
 41 | 		except getopt.GetoptError as err:
 42 | 			print str(err)
 43 | 			self.exit(1)
 44 | 
 45 | 		for o, v in opts:
 46 | 			if o == '-h':
 47 | 				self.usage()
 48 | 				self.exit(0)
 49 | 			elif o == '-s':
 50 | 				self.skip_duplicates = False
 51 | 
 52 | 
 53 | 
 54 | 	def usage(self):
 55 | 		print (	"htcap sqlmap module\nusage: scan sqlmap <db_file> [options]\n"
 56 | 				"Options are:\n"
 57 | 				"  -h   this help\n"
 58 | 				"  -s   do not skip duplicated urls\n"
 59 | 			)
 60 | 
 61 | 	def get_settings(self):
 62 | 		return dict(
 63 | 			request_types = "xhr,link,form,jsonp,redirect",
 64 | 			num_threads = 5,
 65 | 			process_timeout = 300,
 66 | 			scanner_exe = "/usr/share/sqlmap/sqlmap.py"
 67 | 		)
 68 | 
 69 | 	# return False to skip current request
 70 | 	def get_cmd(self, request, tmp_dir):
 71 | 
 72 | 		if self.skip_duplicates and self.is_request_duplicated(request):
 73 | 			return False
 74 | 
 75 | 		if request.method == "GET":
 76 | 			purl = urlsplit(request.url)
 77 | 			if not purl.query:
 78 | 				return False
 79 | 
 80 | 		#print request.url
 81 | 
 82 | 		out_dir = tmp_dir + "/tmp"
 83 | 		if not os.path.exists(out_dir):
 84 | 			os.makedirs(out_dir, 0700)
 85 | 
 86 | 		cookie_file = tmp_dir + "/cookies.json"
 87 | 		with open(cookie_file,'w') as cf:
 88 | 			for c in request.cookies:
 89 | 				cf.write(c.get_as_netscape() + "\n")
 90 | 
 91 | 		cmd = [
 92 | 			"--batch",
 93 | 			"-u", request.url,
 94 | 			"-v", "0",
 95 | 			"--disable-coloring",
 96 | 			"--text-only",
 97 | 			"--purge-output",
 98 | 			"-o",
 99 | 			"--crawl=0",
100 | 			"--output-dir", out_dir
101 | 			]
102 | 
103 | 		if request.referer:
104 | 			cmd.extend(("--referer", request.referer))
105 | 
106 | 		if len(request.cookies) > 0:
107 | 			cmd.extend(("--load-cookies", cookie_file))
108 | 
109 | 		if request.method == "POST":
110 | 			cmd.extend(("--method","POST"))
111 | 			if request.data:
112 | 				cmd.extend(("--data",request.data))
113 | 
114 | 
115 | 		return cmd
116 | 
117 | 	def scanner_executed(self, request, out, err, tmp_dir, cmd):
118 | 		# print cmd_to_str(cmd)
119 | 		if not out:return
120 | 
121 | 		descr = "C O M M A N D\n\n%s\n\nD E T A I L S\n\n" % cmd_to_str(cmd)
122 | 		report = re.findall(r'---([^]]*)---', out)
123 | 		if len(report) == 0: return
124 | 		for vuln in report:
125 | 			descr += vuln + "\n"
126 | 
127 | 		self.save_vulnerability(request, "sqli", descr)
128 | 
129 | 


--------------------------------------------------------------------------------
/core/scan/scanner.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | import sys
 15 | import time
 16 | import re
 17 | import json
 18 | import base64
 19 | import uuid
 20 | import getopt
 21 | import os 
 22 | import glob
 23 | import importlib
 24 | 
 25 | from core.lib.exception import *
 26 | from core.lib.cookie import Cookie
 27 | from core.lib.utils import *
 28 | from base_scanner import BaseScanner
 29 | 
 30 | 
 31 | class Scanner:
 32 | 
 33 | 	def __init__(self, argv):
 34 | 		scanner_files = glob.glob(os.path.join("%s%sscanners" % (getrealdir(__file__), os.sep) , "*.py"))
 35 | 		self.scanners = [os.path.basename(m).split(".")[0] for m in scanner_files if not m.endswith("__.py")]
 36 | 
 37 | 		num_threads = None
 38 | 		request_types = None
 39 | 		process_timeout = None
 40 | 		display_progress = True
 41 | 		scanner_exe = None
 42 | 
 43 | 		try:
 44 | 			opts, args = getopt.getopt(argv, 'hn:t:r:qe:')
 45 | 		except getopt.GetoptError as err:
 46 | 			print str(err)	
 47 | 			sys.exit(1)
 48 | 		
 49 | 		
 50 | 		if len(args) < 2:
 51 | 			self.usage()
 52 | 			sys.exit(1)
 53 | 		
 54 | 
 55 | 		for o, v in opts:
 56 | 			if o == '-h':
 57 | 				self.usage()
 58 | 				sys.exit(0)
 59 | 			elif o == '-n':
 60 | 				num_threads = int(v)
 61 | 			elif o == '-t':
 62 | 				process_timeout = int(v)
 63 | 			elif o == '-e':
 64 | 				scanner_exe = v	
 65 | 			elif o == '-q':
 66 | 				display_progress = False	
 67 | 			elif o == '-r':
 68 | 				request_types = v
 69 | 
 70 | 		self.scanner = args[0]
 71 | 		self.db_file = args[1]
 72 | 		
 73 | 		scanner_argv = args[2:]
 74 | 
 75 | 		if not self.scanner in self.scanners:
 76 | 			print "Available scanners are: %s" % ", ".join(self.scanners) 
 77 | 			sys.exit(1)
 78 | 
 79 | 		if not os.path.exists(self.db_file):		
 80 | 			print "No such file %s" % self.db_file
 81 | 			sys.exit(1)
 82 | 		
 83 | 		
 84 | 		mod = importlib.import_module("core.scan.scanners.%s" % self.scanner)
 85 | 		run = getattr(mod, self.scanner.title())
 86 | 		run(self.db_file, num_threads, request_types, process_timeout, scanner_exe, display_progress, scanner_argv)
 87 | 
 88 | 		print "Scan finished"
 89 | 
 90 | 
 91 | 
 92 | 	def usage(self):
 93 | 		print (
 94 | 			"\n"
 95 | 			"Usage: scan [options] <scanner> <db_file> [scanner_options]\n"
 96 | 			"Options: \n"
 97 | 			   "  -h                   this help\n"
 98 | 			   "  -n THREADS           number of parallel threads\n"
 99 | 			   "  -r REQUEST_TYPES     comma separated list of request types to pass to the scanner\n"
100 | 			   "  -t TIMEOUT           process timeout in seconds\n"
101 | 			   "  -e PATH              path to scanner executable\n"
102 | 			"\n"
103 | 			"Scanner Options: \n"
104 | 				"  those are scanner-specific options (if available), you should try -h ..\n"
105 | 			"\n"
106 | 			"Available scanners are:\n"			
107 | 			"  - " + "\n  - ".join(self.scanners) +	"\n"	
108 | 			"\n"
109 | 			"Available request types are:\n"
110 | 			"  - xhr (ajax)\n"
111 | 			"  - link (anchors href)\n"
112 | 			"  - redirect (url from redirect)\n"
113 | 			"  - form\n"
114 | 			"  - jsonp\n"		
115 | 			"  - websocket\n"		
116 | 		)
117 | 
118 | 	


--------------------------------------------------------------------------------
/core/scan/scanners/wapiti.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | import sys
 15 | import os
 16 | import time
 17 | import re
 18 | import json
 19 | import base64
 20 | import uuid
 21 | 
 22 | from core.lib.exception import *
 23 | from core.lib.cookie import Cookie
 24 | from core.lib.utils import *
 25 | from core.scan.base_scanner import BaseScanner
 26 | 
 27 | class Wapiti(BaseScanner):
 28 | 	
 29 | 
 30 | 	def init(self, argv):
 31 | 		return True
 32 | 	
 33 | 	def get_settings(self):
 34 | 		return dict(
 35 | 			scanner_name = "wapiti",
 36 | 			request_types = "xhr,link,form,jsonp,redirect",
 37 | 			num_threads = 10,
 38 | 			process_timeout = 180,
 39 | 			scanner_exe = "python /usr/local/bin/wapiti"
 40 | 		)
 41 | 
 42 | 	# return False to skip current request
 43 | 	def get_cmd(self, request, tmp_dir):
 44 | 		url = request.url
 45 | 		# skip check of XSS via POST sice they should be considered CSRF 
 46 | 		if request.method == "POST" and request.data:
 47 | 			url += "?" + request.data
 48 | 			
 49 | 
 50 | 		out_file = tmp_dir + "/output.json"
 51 | 
 52 | 		cookie_file = tmp_dir + "/cookies.json"
 53 | 		with open(cookie_file,'w') as cf:
 54 | 			jsn = self.convert_cookies(request.cookies)			
 55 | 			cf.write(jsn)
 56 | 		
 57 | 
 58 | 		cmd = [			
 59 | 			url,
 60 | 			"--timeout", "30",			
 61 | 			# Set the modules (and HTTP methods for each module) to use for attacks.
 62 | 			# Prefix a module name with a dash to deactivate the related module.
 63 | 			# To only browse the target (without sending any payloads), deactivate every module with -m "-all".
 64 | 			# If you don't specify the HTTP methods, GET and POST will be used.
 65 | 			# Example: -m "-all,xss:get,exec:post"
 66 | 			"--module", "-all,xss:get",
 67 | 			"--scope", "page",
 68 | 			"--format", "json",
 69 | 			"--output", out_file,
 70 | 			"--verify-ssl", "0"
 71 | 			]
 72 | 
 73 | 		# ! no option to set referer ?
 74 | 
 75 | 		if len(request.cookies) > 0:
 76 | 			cmd.extend(("--cookie", cookie_file))
 77 | 
 78 | 		# print cmd_to_str(cmd)
 79 | 		# self.exit(1)
 80 | 		# return False
 81 | 		return cmd
 82 | 
 83 | 	def scanner_executed(self, request, out, err, tmp_dir, cmd):
 84 | 		out_file = tmp_dir + "/output.json"
 85 | 
 86 | 		if not os.path.exists(out_file):
 87 | 			return 
 88 | 
 89 | 		with open(out_file,'r') as fil:
 90 | 			jsn = fil.read()		
 91 | 		
 92 | 		report = []
 93 | 		try:
 94 | 			report = json.loads(jsn)['vulnerabilities']['Cross Site Scripting']
 95 | 		except Exception as e:
 96 | 			print err
 97 | 		
 98 | 		for vuln in report:
 99 | 			self.save_vulnerability(request, "XSS", json.dumps(vuln))
100 | 
101 | 
102 | 
103 | 	# convert cookies to wapiti format
104 | 	def convert_cookies(self, cookies):
105 | 		wcookies = {}
106 | 		for cookie in cookies:
107 | 			domain = cookie.domain
108 | 			if domain:
109 | 				if not domain.startswith("."): domain = ".%s" % domain				
110 | 			else:
111 | 				domain = cookie.setter.hostname
112 | 			
113 | 			if not domain in wcookies.keys():
114 | 				wcookies[domain] = {}
115 | 
116 | 			if not cookie.path in wcookies[domain].keys():
117 | 				wcookies[domain][cookie.path] = {}
118 | 
119 | 			wcookies[domain][cookie.path][cookie.name] = dict(
120 | 				version = 0,
121 | 				expires = cookie.expires,
122 | 				secure = cookie.secure,
123 | 				value = cookie.value,
124 | 				port = None
125 | 			)
126 | 		
127 | 		return json.dumps(wcookies)


--------------------------------------------------------------------------------
/tests/crawl_tests/urlfinder_tests.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from core.crawl.lib.urlfinder import UrlFinder
  4 | 
  5 | 
  6 | class UrlFinderTest(unittest.TestCase):
  7 |     def test_empty_html(self):
  8 |         html_sample = ""
  9 |         finder = UrlFinder(html_sample)
 10 | 
 11 |         self.assertEqual(finder.get_urls(), [])
 12 | 
 13 |     def test_basic_html(self):
 14 |         html_sample = """<!DOCTYPE html>
 15 |             <html>
 16 |             <head>
 17 |                 <title></title>
 18 |             </head>
 19 |             <body>
 20 |             </body>
 21 |             </html>"""
 22 |         finder = UrlFinder(html_sample)
 23 | 
 24 |         self.assertEqual(finder.get_urls(), [])
 25 | 
 26 |     def test_with_relative_link(self):
 27 |         html_sample = '<a href="test.html">test</a>'
 28 |         finder = UrlFinder(html_sample)
 29 | 
 30 |         self.assertEqual(finder.get_urls(), ["test.html"])
 31 | 
 32 |     def test_with_relative_link_and_absolute_base_href(self):
 33 |         html_sample = """<!DOCTYPE html>
 34 |             <html>
 35 |             <head>
 36 |                 <base href=" http://somewhere.else/someWeirdPath/ " target="_self">
 37 |             </head>
 38 |             <body>
 39 |             <a href="test.html">test</a>
 40 |             </body>
 41 |             </html>"""
 42 |         finder = UrlFinder(html_sample)
 43 |         self.assertEqual(finder.get_urls(), ["http://somewhere.else/someWeirdPath/test.html"])
 44 | 
 45 |         html_sample = """<!DOCTYPE html>
 46 |             <html>
 47 |             <head>
 48 |                 <base href="http://somewhere.else/someWeirdPath/index.html" target="_self">
 49 |             </head>
 50 |             <body>
 51 |             <a href="test.html">test</a>
 52 |             </body>
 53 |             </html>"""
 54 |         finder = UrlFinder(html_sample)
 55 |         self.assertEqual(finder.get_urls(), ["http://somewhere.else/someWeirdPath/test.html"])
 56 | 
 57 |     def test_with_relative_link_and_relative_base_href(self):
 58 |         html_sample = """<!DOCTYPE html>
 59 |             <html>
 60 |             <head>
 61 |                 <base href="/someWeirdPath/" target="_self">
 62 |             </head>
 63 |             <body>
 64 |             <a href="test.html">test</a>
 65 |             </body>
 66 |             </html>"""
 67 |         finder = UrlFinder(html_sample)
 68 |         self.assertEqual(finder.get_urls(), ["/someWeirdPath/test.html"])
 69 | 
 70 |         html_sample = """<!DOCTYPE html>
 71 |             <html>
 72 |             <head>
 73 |                 <base href="someWeirdPath/" target="_self">
 74 |             </head>
 75 |             <body>
 76 |             <a href="test.html">test</a>
 77 |             </body>
 78 |             </html>"""
 79 |         finder = UrlFinder(html_sample)
 80 |         self.assertEqual(finder.get_urls(), ["someWeirdPath/test.html"])
 81 | 
 82 |     def test_with_anchor_link(self):
 83 |         html_sample = '<a href="#test">test</a>'
 84 |         finder = UrlFinder(html_sample)
 85 | 
 86 |         self.assertEqual(finder.get_urls(), [])
 87 | 
 88 |     def test_with_no_http_link(self):
 89 |         html_sample = '<a href="ftp://test.lan">test</a>'
 90 | 
 91 |         finder = UrlFinder(html_sample)
 92 | 
 93 |         self.assertEqual(finder.get_urls(), [])
 94 | 
 95 |     def test_with_http_absolute_link(self):
 96 |         html_sample = '<a href="http://test.lan">test</a>'
 97 | 
 98 |         finder = UrlFinder(html_sample)
 99 | 
100 |         self.assertEqual(finder.get_urls(), ["http://test.lan"])
101 | 
102 |     def test_with_https_absolute_link(self):
103 |         html_sample = '<a href="https://test.lan">test</a>'
104 | 
105 |         finder = UrlFinder(html_sample)
106 | 
107 |         self.assertEqual(finder.get_urls(), ["https://test.lan"])
108 | 


--------------------------------------------------------------------------------
/core/util/utilities/login.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - htcap.org
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under
  8 | the terms of the GNU General Public License as published by the Free Software
  9 | Foundation; either version 2 of the License, or (at your option) any later
 10 | version.
 11 | """
 12 | 
 13 | import sys
 14 | import sqlite3
 15 | import json
 16 | import getopt 
 17 | import os
 18 | import getpass
 19 | 
 20 | from core.lib.utils import *
 21 | from core.lib.shell import CommandExecutor
 22 | from core.util.base_util import BaseUtil
 23 | from core.lib.cookie import Cookie
 24 | 
 25 | reload(sys)
 26 | sys.setdefaultencoding('utf8')
 27 | 
 28 | class Login(BaseUtil):
 29 | 
 30 | 	@staticmethod
 31 | 	def get_settings():
 32 | 		return dict(
 33 | 			descr = "Login to a webapp to get session cookies and logout urls",
 34 | 			optargs = 'p:HJhAcl',
 35 | 			minargs = 2
 36 | 		)
 37 | 
 38 | 	def usage(self):
 39 | 		return (
 40 | 			"%s\n"
 41 | 			"usage: %s [options] <url> <username> [<text_of_submit_element>]\n"
 42 | 			"Options:\n"
 43 | 			"   -h            This help\n"
 44 | 			"   -p PASSWD     Set login password\n"
 45 | 			"   -c            Do not output cookies\n"
 46 | 			"   -l            Do not output logout urls\n"
 47 | 			"   -H            Format output as htcap arguments\n"
 48 | 			"   -J            Format output as json (for cookies only)\n"
 49 | 			"   -A            Format output as general command line arguments\n"
 50 | 			% (self.get_settings()['descr'], self.utilname)
 51 | 		)
 52 | 
 53 | 
 54 | 	def main(self, args, opts):
 55 | 		passw = None
 56 | 		format = None
 57 | 		out_cookies = True
 58 | 		out_logouts = True
 59 | 		for o,v in opts:
 60 | 			if o == "-h":
 61 | 				print self.usage()
 62 | 				sys.exit(0)
 63 | 			elif o == "-p":
 64 | 				passw = v
 65 | 			elif o == "-c":
 66 | 				out_cookies = False
 67 | 			elif o == "-l":
 68 | 				out_logouts = False
 69 | 			elif o in  ("-H", "-J", "-A"):
 70 | 				format = o
 71 | 
 72 | 		if not passw:
 73 | 			print "The password is hidden here BUT it will be passed to phantomjs via commandline ..."
 74 | 			try:
 75 | 				passw = getpass.getpass()
 76 | 			except KeyboardInterrupt:
 77 | 				print "\nAbort..."
 78 | 				sys.exit(0)
 79 | 
 80 | 		jspath = "%s%s%s%s" % (getrealdir(__file__), "login", os.sep, "login.js")
 81 | 		cmd = get_phantomjs_cmd() + [jspath, args[0], args[1], passw]
 82 | 		if len(args) > 2: cmd.append(args[2])
 83 | 		#print cmd_to_str(cmd)
 84 | 		exe = CommandExecutor(cmd, True)
 85 | 		out, err = exe.execute(20)
 86 | 		if err:
 87 | 			print "Unable to login"
 88 | 			sys.exit(1)
 89 | 
 90 | 		try:
 91 | 			ret = json.loads(out)
 92 | 		except ValueError as e:
 93 | 			print e
 94 | 			sys.exit(1)
 95 | 		allcookies, logouts = ret
 96 | 		cookies = []
 97 | 		if out_cookies:
 98 | 			for c in reversed(allcookies):
 99 | 				cookie = Cookie(c)
100 | 				if not cookie in cookies: cookies.append(cookie)
101 | 		if not out_logouts:
102 | 			logouts = []
103 | 
104 | 		if not format:
105 | 			print "Cookies:"
106 | 			for c in cookies:
107 | 				print " %s=%s" % (c.name, c.value)
108 | 			print "Logout urls:"
109 | 			for u in logouts:
110 | 				print " %s" % u
111 | 		elif format == "-A":
112 | 			for c in cookies:
113 | 				print cmd_to_str([c.name, c.value])
114 | 			for u in logouts:
115 | 				print cmd_to_str([u])
116 | 		elif format == "-H":
117 | 			args = []
118 | 			if len(cookies) > 0:
119 | 				args = ["-c", ";".join(["%s=%s" % (c.name, c.value) for c in cookies])]
120 | 			if len(logouts) > 0:
121 | 				args.extend(["-x", ",".join(logouts)])
122 | 			if len(args) > 0:
123 | 				print cmd_to_str(args)
124 | 		elif format == "-J":
125 | 			cd = []
126 | 			for c in cookies:
127 | 				cd.append(c.get_dict())
128 | 			if out_cookies:
129 | 				print json.dumps(cd)
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/core/crawl/probe/src/constants.js:
--------------------------------------------------------------------------------
 1 | (function() {
 2 |     'use strict';
 3 | 
 4 |     exports.constants = {
 5 |         XHRTimeout: 5000,
 6 | 
 7 |         eventLoopConfig: {
 8 |             messageEvent: {
 9 |                 from: 'javascript-probe',
10 |                 name: 'event-loop-ready',
11 |             },
12 | 
13 |             /**
14 |              * number of event loop cycle between every new action proceed in the eventLoop
15 |              * lower is better for speed
16 |              * higher is better for discovery
17 |              */
18 |             bufferCycleSize: 150,
19 | 
20 |             /**
21 |              * in milliseconds,
22 |              * after trigger of an event, time to wait before requesting another eventLoop cycle
23 |              * lower is better for speed
24 |              */
25 |             afterEventTriggeredTimeout: 10,
26 | 
27 |             /**
28 |              * in milliseconds,
29 |              * after a done XHR, time to wait before requesting another eventLoop cycle
30 |              */
31 |             afterDoneXHRTimeout: 10,
32 | 
33 |             /**
34 |              * in milliseconds,
35 |              * time to wait before closing the event loop manager (when everything seems to be done)
36 |              */
37 |             beforeClosingEventLoopManagerTimeout: 500,
38 |         },
39 | 
40 |         // see: https://developer.mozilla.org/en-US/docs/Web/Events
41 |         mappableEvents: [
42 |             'abort', 'blur', 'canplay', 'canplaythrough', 'change', 'click', 'close', 'contextmenu', 'copy',
43 |             'cut', 'dblclick', 'drag', 'dragend', 'dragenter', 'dragleave', 'dragover', 'dragstart', 'drop',
44 |             'durationchange', 'emptied', 'ended', 'error', 'focus', 'fullscreenchange', 'fullscreenerror',
45 |             'input', 'invalid', 'keydown', 'keypress', 'keyup', 'load', 'loadeddata', 'loadedmetadata',
46 |             'loadstart', 'mousedown', 'mouseenter', 'mouseleave', 'mousemove', 'mouseout', 'mouseover',
47 |             'mouseup', 'paste', 'pause', 'play', 'playing', 'progress', 'ratechange', 'reset', 'resize',
48 |             'scroll', 'seeked', 'seeking', 'select', 'show', 'stalled', 'submit', 'suspend', 'timeupdate',
49 |             'volumechange', 'waiting', 'wheel',
50 |         ],
51 | 
52 |         /**
53 |          *  always trigger these events on the given element
54 |          */
55 |         triggerableEvents: {
56 |             'button': ['click', 'dblclick', 'keyup', 'keydown', 'mouseup', 'mousedown'],
57 |             'select': ['change', 'click', 'keyup', 'keydown', 'mouseup', 'mousedown'],
58 |             'input': ['change', 'click', 'blur', 'focus', 'keyup', 'keydown', 'mouseup', 'mousedown'],
59 |             'a': ['click', 'dblclick', 'keyup', 'keydown', 'mouseup', 'mousedown'],
60 |             'textarea': ['change', 'click', 'blur', 'focus', 'keyup', 'keydown', 'mouseup', 'mousedown'],
61 |             'span': ['click', 'mouseup', 'mousedown'],
62 |             'td': ['click', 'mouseup', 'mousedown'],
63 |             'tr': ['click', 'mouseup', 'mousedown'],
64 |             'div': ['click', 'mouseup', 'mousedown'],
65 |         },
66 | 
67 |         // map input names to string generators. see generateRandomValues to see all available generators
68 |         inputNameMatchValue: [ // regexps NEED to be string to get passed to the page
69 |             {name: 'mail', value: 'email'},
70 |             {name: '((number)|(phone))|(^tel)', value: 'number'},
71 |             {name: '(date)|(birth)', value: 'humandate'},
72 |             {name: '((month)|(day))|(^mon$)', value: 'month'},
73 |             {name: 'year', value: 'year'},
74 |             {name: 'url', value: 'url'},
75 |             {name: 'firstname', value: 'firstname'},
76 |             {name: '(surname)|(lastname)', value: 'surname'},
77 |         ],
78 | 
79 |         /**
80 |          * in pixels,
81 |          * viewport size of the browser
82 |          */
83 |         viewport: {
84 |             width: 1920,
85 |             height: 1080,
86 |         },
87 |     };
88 | 
89 | })();
90 | 


--------------------------------------------------------------------------------
/core/util/utilities/htmlreport/report.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <body onload="initReport()">
  4 | <div id=top>
  5 | 	<div id="collapse_top">
  6 | 		<div id=top_header>
  7 | 			<p id=title>Htcap <span id=htcap_version></span> Report</p>
  8 | 			<hr>
  9 | 			<table id=infos>
 10 | 				<tr><td nowrap>Target:</td><td id=infos_target></td></tr>
 11 | 				<tr><td nowrap>Crawl date:</td><td id=infos_scan_date></td></tr>
 12 | 				<tr><td nowrap>Pages crawled:</td><td id=infos_scanned_urls></td></tr>
 13 | 				<tr><td nowrap>Crawl duration:</td><td id=infos_scan_time></td></tr>
 14 | 				<tr><td nowrap>Out of scope:</td><td>
 15 | 					<span id=infos_outofscope></span> 
 16 | 					<span class=button id=outofscope-open>open</span>
 17 | 				</td></tr>
 18 | 				<tr><td nowrap>Non HTML:</td><td>
 19 | 					<span id=infos_nonhtml></span> 
 20 | 					<span class=button id=nonhtml-open>open</span>
 21 | 				</td></tr>
 22 | 				<tr><td nowrap>Command:</td><td id=infos_commandline></td></tr>
 23 | 			</table>
 24 | 
 25 | 		</div>
 26 | 	</div>
 27 | 	<hr>
 28 | 
 29 | 	<div id=filtersbox>
 30 | 		<form>
 31 | 			<div>
 32 | 				<span><input type=checkbox id=allfilters> <b>Show:</b> </span> <span id=filters></span>
 33 | 			</div>
 34 | 			<table>
 35 | 				<tr>
 36 | 					<td>Hide Urls: <p class="hidden addregexp" id=add_url_regexp>add selected</p></td>
 37 | 					<td><textarea id=urlhider tabindex=1  title='write here a regexp to hide matched urls'></textarea></td>
 38 | 					<td>&nbsp;</td>
 39 | 					<td>Hide Results: <p class="hidden addregexp" id=add_result_regexp>add selected text</p></td>
 40 | 					<td><textarea id=reshider tabindex=2 title='write here a regexp to hide matched results'></textarea></td>
 41 | 				</tr>
 42 | 			</table>
 43 | 		</form>
 44 | 	</div>
 45 | 	<div id=buttons>
 46 | 		<span class=button id=notes-open>open notes</span>
 47 | 		<span class=button id=marked-open>open marked</span>
 48 | 		<span class=button id=trash-open>open trash</span>
 49 | 		<span class=button-spacer>&nbsp;</span>
 50 | 		<span class=button id=collapse-all>collapse all</span>
 51 | 		<span class=button id=expand-visibles>expand visibles</span>
 52 | 		<span class=button-spacer>&nbsp;</span>
 53 | 		<label for="load_status_input" class=button>load session</label><input style='display:none' type=file id="load_status_input">
 54 | 		<span class=button id=save-status>save session</span>
 55 | 		<span id=save_status class="hidden">
 56 | 			<input type=text placeholder="htcap_report_status.json"> <a class=button href="" download="">save</a>
 57 | 		</span>
 58 | 		<span id=error_container></span>
 59 | 
 60 | 	</div>
 61 | 	<hr>
 62 | </div>
 63 | 
 64 | <div id=report></div>
 65 | 
 66 | 
 67 | <div id=marked class="modal hidden">
 68 | 	<div class=modal-bar>
 69 | 		Marked
 70 | 		<div id='marked-close' title=close>&#215;</div>
 71 | 	</div>
 72 | 	<div class=modal-content></div>
 73 | </div>
 74 | 
 75 | 
 76 | <div id=trash class="modal hidden">
 77 | 	<div class=modal-bar>
 78 | 		Trash
 79 | 		<div id='trash-close' title=close>&#215;</div>
 80 | 	</div>
 81 | 	<div class=modal-content></div>
 82 | </div>
 83 | 
 84 | 
 85 | <div id=notes class="modal hidden">
 86 | 	<div class=modal-bar>
 87 | 		Notes
 88 | 		<div id='notes-close' title=close>&#215;</div>
 89 | 	</div>
 90 | 	<div class=modal-content>
 91 | 		<textarea></textarea>
 92 | 	</div>
 93 | </div>
 94 | 
 95 | 
 96 | 
 97 | <div id=outofscope class="modal hidden">
 98 | 	<div class=modal-bar>
 99 | 		Out of scope
100 | 		<div id='outofscope-close' title=close>&#215;</div>
101 | 	</div>
102 | 	<div class=modal-content></div>
103 | </div>
104 | 
105 | <div id=nonhtml class="modal hidden">
106 | 	<div class=modal-bar>
107 | 		Non HTML Documents
108 | 		<div id='nonhtml-close' title=close>&#215;</div>
109 | 	</div>
110 | 	<div class=modal-content></div>
111 | </div>
112 | 
113 | 
114 | 
115 | <div id=vulnerability class="modal hidden">
116 | 	<div class=modal-bar>
117 | 		Vulnerability Details
118 | 		<div id='vulnerability-close' title=close>&#215;</div>
119 | 	</div>
120 | 	<div class=modal-content></div>
121 | </div>
122 | 
123 | </body>
124 | 
125 | 


--------------------------------------------------------------------------------
/core/crawl/probe/.eslintrc:
--------------------------------------------------------------------------------
  1 | {
  2 |   "extends": "eslint:recommended",
  3 |   "env": {
  4 |     "browser": true,
  5 |     "node": true,
  6 |     "es6": true
  7 |   },
  8 |   "globals": {
  9 |     "describe": true,
 10 |     "afterEach": true,
 11 |     "beforeEach": true,
 12 |     "it": true,
 13 |     "expect": true,
 14 |     "by": true,
 15 |     "spyOn": true,
 16 |     "WebKitMutationObserver": true,
 17 |     "chrome": true,
 18 |     "PointerEvent": true
 19 |   },
 20 |   "rules": {
 21 |     "semi": "error",
 22 |     "no-bitwise": "error",
 23 |     "camelcase": "error",
 24 |     "curly": "error",
 25 |     "eqeqeq": "error",
 26 |     "object-curly-spacing": "warn",
 27 |     "object-curly-newline": [
 28 |       "error",
 29 |       {
 30 |         "consistent": true
 31 |       }
 32 |     ],
 33 |     "comma-spacing": "warn",
 34 |     "newline-per-chained-call": [
 35 |       "error",
 36 |       {
 37 |         "ignoreChainWithDepth": 2
 38 |       }
 39 |     ],
 40 |     "wrap-iife": [
 41 |       "error",
 42 |       "any"
 43 |     ],
 44 |     "indent": [
 45 |       "error",
 46 |       4,
 47 |       {
 48 |         "SwitchCase": 1,
 49 |         "MemberExpression": "off",
 50 |         "FunctionDeclaration": {
 51 |           "parameters": "first"
 52 |         },
 53 |         "FunctionExpression": {
 54 |           "parameters": "first"
 55 |         }
 56 |       }
 57 |     ],
 58 |     "no-use-before-define": [
 59 |       "error",
 60 |       {
 61 |         "functions": false,
 62 |         "classes": true
 63 |       }
 64 |     ],
 65 |     "new-cap": "error",
 66 |     "no-caller": "error",
 67 |     "quotes": [
 68 |       "error",
 69 |       "single"
 70 |     ],
 71 |     "no-undef": "error",
 72 |     "no-unused-vars": "error",
 73 |     "strict": [
 74 |       "error",
 75 |       "function"
 76 |     ],
 77 |     "no-multi-str": "error",
 78 |     "operator-linebreak": [
 79 |       "error",
 80 |       "after"
 81 |     ],
 82 |     "max-len": [
 83 |       "error",
 84 |       160
 85 |     ],
 86 |     "no-with": "error",
 87 |     "brace-style": "error",
 88 |     "no-mixed-spaces-and-tabs": "error",
 89 |     "key-spacing": [
 90 |       "error",
 91 |       {
 92 |         "beforeColon": false,
 93 |         "afterColon": true
 94 |       }
 95 |     ],
 96 |     "space-unary-ops": [
 97 |       "error",
 98 |       {
 99 |         "words": false,
100 |         "nonwords": false
101 |       }
102 |     ],
103 |     "space-before-function-paren": [
104 |       "error",
105 |       "never"
106 |     ],
107 |     "no-spaced-func": "error",
108 |     "array-bracket-spacing": [
109 |       "error",
110 |       "never"
111 |     ],
112 |     "keyword-spacing": [
113 |       "error",
114 |       {
115 |         "overrides": {
116 |           "else": {
117 |             "before": true,
118 |             "after": true
119 |           },
120 |           "while": {
121 |             "before": true,
122 |             "after": true
123 |           },
124 |           "catch": {
125 |             "before": true,
126 |             "after": true
127 |           },
128 |           "if": {
129 |             "after": true
130 |           },
131 |           "for": {
132 |             "after": true
133 |           },
134 |           "do": {
135 |             "after": true
136 |           },
137 |           "switch": {
138 |             "after": true
139 |           },
140 |           "return": {
141 |             "after": true
142 |           },
143 |           "try": {
144 |             "after": true
145 |           }
146 |         }
147 |       }
148 |     ],
149 |     "space-in-parens": [
150 |       "error",
151 |       "never"
152 |     ],
153 |     "comma-dangle": [
154 |       "error",
155 |       "always-multiline"
156 |     ],
157 |     "no-trailing-spaces": "error",
158 |     "comma-style": [
159 |       "error",
160 |       "last"
161 |     ],
162 |     "eol-last": "error",
163 |     "space-infix-ops": "error",
164 |     "space-before-blocks": [
165 |       "error",
166 |       "always"
167 |     ],
168 |     "linebreak-style": [
169 |       "error",
170 |       "unix"
171 |     ]
172 |   }
173 | }
174 | 


--------------------------------------------------------------------------------
/core/crawl/probe/chrome_extension/background.js:
--------------------------------------------------------------------------------
  1 | /*eslint no-console: off */
  2 | (function() {
  3 |     'use strict';
  4 | 
  5 |     // keep track of all the opened tab
  6 |     let tabs = {};
  7 | 
  8 |     // store the probe starting tab (the first tab navigated with success)
  9 |     let startingTabId = undefined;
 10 | 
 11 |     // Get all existing tabs
 12 |     chrome.tabs.query({}, function(results) {
 13 |         results.forEach(function(tab) {
 14 |             tabs[tab.id] = tab;
 15 |         });
 16 |     });
 17 | 
 18 |     function onCreatedListener(tab) {
 19 |         tabs[tab.id] = tab;
 20 |         tabs[tab.id].haveBeenNavigated = false;
 21 |     }
 22 | 
 23 |     // Create tab event listeners
 24 |     function onUpdatedListener(tabId, changeInfo, tab) {
 25 |         if (tab.url.startsWith('http')) {
 26 |             if (tab.url.startsWith('http') && changeInfo.status === 'complete') {
 27 |                 startingTabId = tabId;
 28 |                 tabs[startingTabId].haveBeenNavigated = true;
 29 |             }
 30 |         }
 31 |     }
 32 | 
 33 |     function onRemovedListener(tabId) {
 34 |         delete tabs[tabId];
 35 |     }
 36 | 
 37 |     /**
 38 |      * if the request url differ from the current tab url block it
 39 |      * @param details
 40 |      * @return {{redirectUrl: string}}
 41 |      */
 42 |     function onBeforeRequestListener(details) {
 43 |         let result, currentTab = tabs[details.tabId];
 44 | 
 45 |         // if the current tab exist (sometimes the request is issue before the tab exist)
 46 |         if (currentTab) {
 47 | 
 48 |             // DEBUG:
 49 |             console.group();
 50 |             console.log('currentTab', currentTab);
 51 |             console.log('details', details);
 52 |             console.log('startingTabId', startingTabId);
 53 |             console.log(details.url.startsWith('http') && details.type === 'sub_frame');
 54 |             console.groupEnd();
 55 | 
 56 |             // create the message link with the probe
 57 |             chrome.tabs.executeScript(startingTabId || currentTab.id, {file: 'content.js', runAt: 'document_start'});
 58 | 
 59 |             // if the content is loaded from sub-frame)
 60 |             if (details.url.startsWith('http') && details.type === 'sub_frame') {
 61 | 
 62 |                 _notifyProbe(details.url, startingTabId || currentTab.id);
 63 | 
 64 |                 // redirect the navigation to nowhere
 65 |                 result = {redirectUrl: 'javascript:void(0)'};
 66 | 
 67 |             } else if (startingTabId) {
 68 | 
 69 |                 if (currentTab.id !== startingTabId) { // if the current tab is a new tab
 70 |                     _notifyProbe(details.url, startingTabId);
 71 | 
 72 |                     // redirect the navigation to nowhere
 73 |                     result = {redirectUrl: 'javascript:void(0)'};
 74 | 
 75 |                     // close the tab
 76 |                     chrome.tabs.remove(currentTab.id);
 77 | 
 78 |                 } else if (tabs[startingTabId].haveBeenNavigated) { // if the starting tab have already been navigated
 79 | 
 80 |                     _notifyProbe(details.url, startingTabId);
 81 | 
 82 |                     // redirect the navigation to nowhere
 83 |                     result = {redirectUrl: 'javascript:void(0)'};
 84 |                 }
 85 |             }
 86 |         }
 87 |         return result;
 88 |     }
 89 | 
 90 |     // Subscribe to tab events to track opened tabs
 91 |     chrome.tabs.onCreated.addListener(onCreatedListener);
 92 |     chrome.tabs.onUpdated.addListener(onUpdatedListener);
 93 |     chrome.tabs.onRemoved.addListener(onRemovedListener);
 94 | 
 95 |     chrome.webRequest.onBeforeRequest.addListener(onBeforeRequestListener, {
 96 |         urls: ['<all_urls>'],
 97 |         types: ['main_frame', 'sub_frame'], // only watching for "frame" type request
 98 |     }, ['blocking']);
 99 | 
100 |     function _notifyProbe(url, tabId) {
101 | 
102 |         // DEBUG:
103 |         console.warn(`Navigation to ${url} blocked.`);
104 | 
105 |         // sending message to the probe
106 |         chrome.tabs.sendMessage(tabId, {url: url});
107 |     }
108 | 
109 | })();
110 | 


--------------------------------------------------------------------------------
/core/lib/request_pattern.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | import json
 14 | import xml.etree.ElementTree as ET
 15 | from urlparse import parse_qs, urlsplit
 16 | 
 17 | 
 18 | class RequestPattern:
 19 |     def __init__(self, request):
 20 |         self.request = request
 21 |         self.pattern = None
 22 | 
 23 |         self.set_pattern()
 24 | 
 25 |     def set_pattern(self):
 26 |         """
 27 |         sets requst pattern for comparision
 28 |         """
 29 | 
 30 |         # pattern[0] = url_pattern, pattern[1] = data_pattern
 31 |         self.pattern = [self.get_url_pattern(self.request.url), None]
 32 | 
 33 |         if self.request.method == "GET" or not self.request.data:
 34 |             return
 35 | 
 36 |         # try xml
 37 |         try:
 38 |             root = ET.fromstring(self.request.data)
 39 |             self.pattern[1] = self.get_xml_pattern(root)
 40 |         except Exception as e:
 41 |             # try json
 42 |             try:
 43 |                 self.pattern[1] = self.get_json_pattern(self.request.data)
 44 |             except Exception as e:
 45 |                 # try url-encoded
 46 |                 try:
 47 |                     self.pattern[1] = self.get_urlencoded_pattern(self.request.data, False)
 48 |                 except Exception as e:
 49 |                     # print "! UNKNOWN POST DATA FORMAT"
 50 |                     pass
 51 | 
 52 |     def get_url_pattern(self, url):
 53 |         """
 54 |         returns url pattern for comparision (query and data parameters are sorted and without values)
 55 |         """
 56 |         purl = urlsplit(url)
 57 |         patt = [purl.scheme, purl.netloc, purl.path, self.get_urlencoded_pattern(purl.query)]
 58 | 
 59 |         return patt
 60 | 
 61 |     def get_xml_pattern(self, node):
 62 |         """
 63 |         returns the xml tree as an array without values, example:
 64 | 
 65 |         <root>
 66 |             <node foo="bar" bar="foo"/>
 67 |             <elements index="1">
 68 |                 <z>1</z>
 69 |                 <z>1</z>
 70 |                 <a type="int">123</a>
 71 |             </elements>
 72 |         </root> 
 73 | 
 74 |         ['root', [      <--- child nodes sorted ('elements' comes before 'node')
 75 |             ['elements', 'index', [
 76 |                 ['a', 'type'], ['z'], ['z']
 77 |             ]], 
 78 |             ['node', 'bar', foo']     <--- properties sorted ('bar' comes before 'foo')
 79 |         ]]
 80 |         """
 81 | 
 82 |         # describe a tag as its name plus the name of its properties (sorted)
 83 |         patt = [node.tag] + [x for x in sorted(node.attrib.keys())]
 84 | 
 85 |         # collect child nodes in the form of "node array" (tagname+props)
 86 |         ch = []
 87 |         for child in node:
 88 |             ch.append(self.get_xml_pattern(child))
 89 | 
 90 |         if ch:
 91 |             # sort using the tagname as the key
 92 |             ch.sort(key=lambda x: x[0])
 93 |             patt.append(ch)
 94 | 
 95 |         return patt
 96 | 
 97 |     def get_json_pattern(self, data):
 98 |         """
 99 |         returns an object with values set to zero (sorting of keys is not needed), example:
100 |         """
101 | 
102 |         patt = json.loads(data)
103 |         self.nullify_object_values(patt)
104 | 
105 |         return patt
106 | 
107 |     def nullify_object_values(self, obj):
108 |         """
109 |         sets to 0 all object values
110 |         """
111 |         keys = obj.keys() if isinstance(obj, dict) else range(0, len(obj))
112 | 
113 |         for k in keys:
114 |             if not hasattr(obj[k], '__iter__'):
115 |                 obj[k] = 0
116 |             else:
117 |                 self.nullify_object_values(obj[k])
118 | 
119 |     def get_urlencoded_pattern(self, data, ignoreErrors=True):
120 |         """
121 |         returns query parameters sorted and without vaules
122 |         """
123 |         # parse_qs(qs[, keep_blank_values[, strict_parsing]])
124 |         query = parse_qs(data, True, not ignoreErrors)
125 |         return sorted(query.keys())
126 | 


--------------------------------------------------------------------------------
/core/crawl/probe/index.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @todo (blocked):
  3 |  * - make possible to send custom headers (set a referer) on page.goto() see: {@link https://github.com/GoogleChrome/puppeteer/issues/1062}
  4 |  *     and {@link https://github.com/GoogleChrome/puppeteer/issues/686}
  5 |  *
  6 |  * @todo (nice to have):
  7 |  * - store headers for every request (mainly cookies and referrer) to enable a better "replay"
  8 |  * - also analyse the error pages (40x and 50x)
  9 |  */
 10 | 
 11 | (function() {
 12 |     'use strict';
 13 | 
 14 |     const process = require('process');
 15 | 
 16 |     const logger = require('./logger').debug;
 17 |     const output = require('./logger').output;
 18 |     const puppeteer = require('puppeteer');
 19 | 
 20 |     const constants = require('./src/constants').constants;
 21 |     const utils = require('./src/utils');
 22 | 
 23 |     const pageHandler = require('./src/page-handler');
 24 | 
 25 |     let options = utils.getOptionsFromArgs(),
 26 |         result = [],
 27 |         browser,
 28 |         handler;
 29 | 
 30 |     let startTime = Date.now();
 31 | 
 32 |     // handling SIGTERM signal
 33 |     process.on('SIGTERM', () => {
 34 |         if (options.verbosity >= 1) {
 35 |             logger.info('SIGTERM signal received');
 36 |         }
 37 |         result.push({'status': 'error', 'code': 'interruptReceived'});
 38 |         _requestJobEnd();
 39 |     });
 40 | 
 41 |     function _requestJobEnd(exitCode) {
 42 | 
 43 |         if (options.verbosity >= 1) {
 44 |             logger.info('closing Node process');
 45 |             logger.info('debug', `got results in ${(Date.now() - startTime) / 1000} sec : ${JSON.stringify(result)}`);
 46 |         }
 47 |         output.log('info', `${JSON.stringify(result)}`);
 48 | 
 49 |         if (!options.debug) { // keep the browser open for debug
 50 |             if (browser) {
 51 |                 browser.close()
 52 |                     .then(() => {
 53 |                         process.exit(exitCode);
 54 |                     });
 55 |             } else {
 56 |                 process.exit(exitCode);
 57 |             }
 58 |         }
 59 |     }
 60 | 
 61 |     function run([newBrowser, newPage]) {
 62 | 
 63 |         browser = newBrowser;
 64 | 
 65 |         handler = new pageHandler.Handler(newPage, constants, options);
 66 | 
 67 |         handler.on('finished', (exitCode, status) => {
 68 |             result.push(status);
 69 |             _requestJobEnd(exitCode);
 70 |         });
 71 | 
 72 |         handler.on('probeResult', (newResult) => {
 73 |             result.push(newResult);
 74 |         });
 75 | 
 76 |         handler.initialize()
 77 |             .then((page) => {
 78 |                 if (options.verbosity >= 1) {
 79 |                     logger.info(`starting navigation to ${options.startUrl.href}`);
 80 |                 }
 81 |                 page.goto(options.startUrl.href, {waitUntil: 'networkidle2'})
 82 |                     .then(response => {
 83 | 
 84 |                         if (response.ok()) {
 85 |                             // checking if it's some HTML document
 86 |                             if (response.headers()['content-type']
 87 |                                         .toLowerCase()
 88 |                                         .includes('text/html')) {
 89 | 
 90 |                                 handler.getCookies()
 91 |                                         .then(cookies => {
 92 |                                             result.push(['cookies', cookies]);
 93 |                                         });
 94 | 
 95 |                                 if (options.verbosity >= 1) {
 96 |                                     logger.info('starting the probe');
 97 |                                 }
 98 |                                 // start analysis on the page
 99 |                                 handler.startProbe();
100 |                             } else {
101 |                                 result.push({'status': 'error', 'code': 'contentType', 'message': `content type is ${response.headers()['content-type']}`});
102 |                                 _requestJobEnd();
103 |                             }
104 |                         } else {
105 |                             result.push({'status': 'error', 'code': 'responseCode', 'message': `${response.status()}`});
106 |                             _requestJobEnd(1);
107 |                         }
108 |                     },
109 |                     (error) => {
110 |                         if (options.verbosity >= 1) {
111 |                             logger.error(`Error during goto: ${error}`);
112 |                         }
113 |                         result.push({'status': 'error', 'code': 'load', 'message': `${error}`});
114 |                         _requestJobEnd(1);
115 |                     });
116 |             }, (error) => {
117 |                 if (options.verbosity >= 1) {
118 |                     logger.error(`Error during initialisation: ${error}`);
119 |                 }
120 |                 result.push({'status': 'error', 'code': 'probeError', 'message': `${error}`});
121 |                 _requestJobEnd(1);
122 |             });
123 |     }
124 | 
125 |     pageHandler.getBrowserAndPage(puppeteer, options.proxyAddress, options.debug)
126 |         .then(run);
127 | 
128 | })();
129 | 


--------------------------------------------------------------------------------
/core/scan/scanners/arachni.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under
  8 | the terms of the GNU General Public License as published by the Free Software
  9 | Foundation; either version 2 of the License, or (at your option) any later
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | import sys
 15 | import os
 16 | import time
 17 | import re
 18 | import json
 19 | import base64
 20 | import uuid
 21 | import urllib
 22 | import getopt
 23 | import datetime
 24 | 
 25 | from core.lib.exception import *
 26 | from core.lib.cookie import Cookie
 27 | 
 28 | from core.scan.base_scanner import BaseScanner
 29 | from core.lib.shell import CommandExecutor
 30 | from core.lib.utils import *
 31 | 
 32 | class Arachni(BaseScanner):
 33 | 
 34 | 	def init(self, argv):
 35 | 		# scanner_exe is converted to array to handle something like "python /usr/bin/scanner"
 36 | 		self.reporter = "%s%sarachni_reporter" % (os.path.dirname(self.settings['scanner_exe'][-1]), os.sep)
 37 | 		if not os.path.exists(self.reporter):
 38 | 			print "Error finding arachni_reporter: %s" % self.reporter
 39 | 			self.exit(1)
 40 | 
 41 | 		self.skip_duplicates = True
 42 | 		self.execute_command = True
 43 | 		self.audit_both_methods = False
 44 | 
 45 | 		try:
 46 | 			opts, args = getopt.getopt(argv, 'hspb')
 47 | 		except getopt.GetoptError as err:
 48 | 			print str(err)
 49 | 			self.exit(1)
 50 | 
 51 | 		for o, v in opts:
 52 | 			if o == '-h':
 53 | 				self.usage()
 54 | 				self.exit(0)
 55 | 			elif o == '-s':
 56 | 				self.skip_duplicates = False
 57 | 			elif o == '-p':
 58 | 				self.execute_command = False
 59 | 			elif o == '-b':
 60 | 				self.audit_both_methods = False
 61 | 
 62 | 
 63 | 
 64 | 	def usage(self):
 65 | 		print (	"htcap arachni module\nusage: scan arachni <db_file> [options]\n"
 66 | 				"Options are:\n"
 67 | 				"  -h   this help\n"
 68 | 				"  -s   do not skip duplicated urls\n"
 69 | 				"  -p   print first command and exit\n"
 70 | 				"  -b   set audit-with-both-methods arachni option\n"
 71 | 			)
 72 | 
 73 | 	def get_settings(self):
 74 | 		return dict(
 75 | 			request_types = "xhr,link,form,jsonp,redirect",
 76 | 			num_threads = 5,
 77 | 			process_timeout = 180,
 78 | 			scanner_exe = "/usr/share/arachni/bin/arachni"
 79 | 			#scanner_exe = "/usr/bin/arachni"
 80 | 		)
 81 | 
 82 | 	# return False to skip current request
 83 | 	def get_cmd(self, request, tmp_dir):
 84 | 		out_file = tmp_dir + "/report"
 85 | 
 86 | 		if self.skip_duplicates and self.is_request_duplicated(request):
 87 | 			return False
 88 | 
 89 | 		timeout = str(datetime.timedelta(seconds=(self.settings['process_timeout']-5)))
 90 | 
 91 | 		cmd = [
 92 | 			#"--checks", "sql_injection*",
 93 | 			"--checks", "code_injection*,file_inclusion*,path_traversal*,rfi*,xss*,xxe*", # "xss*",
 94 | 			"--output-only-positives",
 95 | 			"--http-request-concurrency", "1",
 96 | 			"--http-request-timeout", "10000",
 97 | 			"--timeout", timeout, #"00:03:00",
 98 | 			"--scope-dom-depth-limit", "0",
 99 | 			"--scope-directory-depth-limit", "0",
100 | 			"--scope-page-limit", "1",
101 | 			"--report-save-path", out_file,
102 | 			"--snapshot-save-path", "/dev/null",
103 | 			#"--http-proxy-type", "socks5",
104 | 			#"--http-proxy","127.0.0.1:9150"
105 | 			]
106 | 
107 | 		if self.audit_both_methods:
108 | 			cmd.append("--audit-with-both-methods")
109 | 
110 | 		if request.referer:
111 | 			cmd.extend(['--http-request-header', 'Referer=%s' % request.referer])
112 | 
113 | 		if len(request.cookies) > 0:
114 | 			cmd.extend(["--http-cookie-string", "; ".join(["%s=%s" % (c.name,c.value) for c in request.cookies])])
115 | 
116 | 		cmd.append(request.url)
117 | 
118 | 		if not self.execute_command:
119 | 			print cmd_to_str(self.settings['scanner_exe'] + cmd)
120 | 			self.exit(0)
121 | 			return False
122 | 
123 | 		return cmd
124 | 
125 | 	def scanner_executed(self, request, out, err, tmp_dir, cmd):
126 | 		out_file = tmp_dir + "/report"
127 | 
128 | 		if not os.path.isfile(out_file):
129 | 			return
130 | 
131 | 		json_file = tmp_dir + "/report.json"
132 | 
133 | 		cmd = [self.reporter, "--reporter", "json:outfile=%s" % json_file, out_file]
134 | 		exe = CommandExecutor(cmd, True)
135 | 		out, err = exe.execute(30)
136 | 
137 | 		if err:
138 | 			print ">>> error exporting arachni to json: %s %s" % (err, request.url)
139 | 			return
140 | 
141 | 		if not os.path.isfile(json_file):
142 | 			return
143 | 
144 | 		with open(json_file,'r') as fil:
145 | 			jsn = fil.read()
146 | 
147 | 		report = []
148 | 		try:
149 | 			report = json.loads(jsn)
150 | 		except Exception as e:
151 | 			print err
152 | 
153 | 		issues = report['issues']
154 | 
155 | 		for i in issues:
156 | 			ref = i['references']['OWASP'] if i['references'] and 'OWASP' in i['references'] else "N/A"
157 | 			req = "N/A"
158 | 			req = None
159 | 
160 | 			if 'request' in i:
161 | 				req = i['request']
162 | 			elif 'variations' in i and len(i['variations']) > 0:
163 | 				req = i['variations'][0]['request']
164 | 
165 | 
166 | 			fields = (i['name'], ref, i['severity'], req['headers_string'] if req else "N/A")
167 | 			descr = "D E T A I L S\n\nName:       %s\nReference:  %s\nSeverity:   %s\n\n\nR E Q U E S T\n\n%s" % fields
168 | 
169 | 			if req and req['method'] == "post":
170 | 				descr += "%s" % urllib.urlencode(req['body'])
171 | 
172 | 			self.save_vulnerability(request, i['check']['shortname'], descr)
173 | 
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/core/lib/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | import os
 14 | import pipes
 15 | import posixpath
 16 | import re
 17 | import sys
 18 | import time
 19 | from urlparse import urlsplit, parse_qsl
 20 | 
 21 | 
 22 | def get_program_infos():
 23 |     infos = {
 24 |         "version": "1.0.1 - dev",
 25 |         "author_name": "Filippo Cavallarin",
 26 |         "author_email": "filippo.cavallarin@wearesegment.com"
 27 |     }
 28 | 
 29 |     return infos
 30 | 
 31 | 
 32 | def generate_filename(name, ext=None, out_file_overwrite=False, ask_out_file_overwrite=False):
 33 |     def fname():
 34 |         return ".".join([f for f in ft if f])
 35 | 
 36 |     ft = name.split(".")
 37 | 
 38 |     if not ext and len(ft) > 1:
 39 |         ext = ft[-1]
 40 | 
 41 |     # remove extension if present in name and equal to ext
 42 |     if ft[-1] == ext: ft.pop()
 43 | 
 44 |     # always append ext, even if None
 45 |     ft.append(ext)
 46 | 
 47 |     if ask_out_file_overwrite and os.path.exists(fname()):
 48 |         try:
 49 |             sys.stdout.write("File %s already exists. Overwrite [y/N]: " % fname())
 50 |             out_file_overwrite = sys.stdin.read(1) == "y"
 51 |         except KeyboardInterrupt:
 52 |             print "\nAborted"
 53 |             sys.exit(0)
 54 | 
 55 |     if not out_file_overwrite:
 56 |         bn = ft[-2]
 57 |         i = 1
 58 |         while os.path.exists(fname()):
 59 |             ft[-2] = "%s-%d" % (bn, i)
 60 |             i += 1
 61 | 
 62 |     return fname()
 63 | 
 64 | 
 65 | def cmd_to_str(cmd):
 66 |     ecmd = [pipes.quote(o) for o in cmd]
 67 |     return " ".join(ecmd)
 68 | 
 69 | 
 70 | def stdoutw(str):
 71 |     sys.stdout.write(str)
 72 |     sys.stdout.flush()
 73 | 
 74 | 
 75 | def getrealdir(path):
 76 |     return os.path.dirname(os.path.realpath(path)) + os.sep
 77 | 
 78 | 
 79 | def print_progressbar(tot, scanned, start_time, label):
 80 |     perc = (scanned * 33) / (tot if tot > 0 else 1)
 81 |     sys.stdout.write("\b" * 150)
 82 |     out = "[%s%s]   %d of %d %s in %d minutes" % (
 83 |         "=" * perc, " " * (33 - perc), scanned, tot, label, int(time.time() - start_time) / 60)
 84 |     stdoutw(out)
 85 | 
 86 | 
 87 | def join_qsl(qs):
 88 |     """
 89 |     join a list returned by parse_qsl
 90 |     do not use urlencode since it will encode values and not just join tuples
 91 |     """
 92 |     return "&".join(["%s=%s" % (k, v) for k, v in qs])
 93 | 
 94 | 
 95 | # ?a=1&a=2&a=3 -> ?a=3, ?a[]=1&a[]=2&a[]=3 -> UNCHANGED
 96 | def group_qs_params(url):
 97 |     purl = urlsplit(url)
 98 |     qs = parse_qsl(purl.query)
 99 |     nqs = list()
100 | 
101 |     for t in reversed(qs):
102 |         if t[0].endswith("[]") or t[0] not in [f for f, _ in nqs]:
103 |             nqs.append(t)
104 | 
105 |     purl = purl._replace(query=join_qsl(reversed(nqs)))
106 | 
107 |     return purl.geturl();
108 | 
109 | 
110 | def normalize_url(url):
111 |     # add http if scheme is not present
112 |     # if an url like 'test.com:80//path' is passed to urlsplit the result is:
113 |     # (scheme='test.com',  path='80//path', ...)
114 |     if not re.match("^[a-z]+://", url, re.I):
115 |         url = "http://%s" % url
116 | 
117 |     purl = urlsplit(url)
118 | 
119 |     # no path and no query_string .. just ensure url ends with /
120 |     if not purl.path:
121 |         return "%s/" % purl.geturl()
122 | 
123 |     # group multiple / (path//to///file -> path/to/file)
124 |     new_path = re.sub(r"/+", "/", purl.path)
125 |     # normalize ../../../
126 |     new_path = posixpath.normpath(new_path)
127 |     if purl.path.endswith('/') and not new_path.endswith('/'):
128 |         new_path += '/'
129 | 
130 |     purl = purl._replace(path=new_path)
131 | 
132 |     return purl.geturl()
133 | 
134 | 
135 | def extract_http_auth(url):
136 |     """
137 |     returns a tuple with httpauth string and the original url with http auth removed
138 |     http://foo:bar@example.local -> (foo:bar, http://example.local)
139 |     """
140 | 
141 |     purl = urlsplit(url)
142 |     if not purl.netloc:
143 |         return (None, url)
144 |     try:
145 |         auth, netloc = purl.netloc.split("@", 1)
146 |     except:
147 |         return (None, url)
148 | 
149 |     purl = purl._replace(netloc=netloc)
150 | 
151 |     return (auth, purl.geturl())
152 | 
153 | 
154 | def remove_tokens(query):
155 |     """
156 |     tries to detect and remove tokens from a query string
157 |     used to compare request ignoring, for example, CSRF tokens
158 |     """
159 | 
160 |     qs = parse_qsl(query)
161 |     nqs = []
162 |     for k, v in qs:
163 |         if len(v) < 32 or not re.match(r'^[a-z0-9\-_\.:=]+$', v, re.I):
164 |             nqs.append((k, v))
165 | 
166 |     return join_qsl(nqs)
167 | 
168 | 
169 | def get_probe_cmd(probe):
170 |     standard_paths = [os.getcwd()]
171 |     envpath = os.environ['PATH'].split(os.pathsep)
172 |     exe_name = probe
173 | 
174 |     if sys.platform != "win32":
175 |         # force check to standard paths in case $PATH is not set (ie crontab)
176 |         standard_paths.extend(["/usr/bin", "/usr/local/bin", "/usr/share/bin"])
177 |     else:
178 |         exe_name = "%s.exe" % exe_name
179 | 
180 |     exe_paths = ["%s%s%s" % (p, os.sep, exe_name) for p in standard_paths + envpath]
181 | 
182 |     for exe in exe_paths:
183 |         if os.path.isfile(exe):
184 |             return [exe, "--ignore-ssl-errors=yes", "--web-security=false", "--ssl-protocol=any", "--debug=false"]
185 | 
186 |     return None
187 | 


--------------------------------------------------------------------------------
/core/util/utilities/login/login.js:
--------------------------------------------------------------------------------
  1 | /*
  2 | HTCAP - htcap.org
  3 | Author: filippo.cavallarin@wearesegment.com
  4 | 
  5 | This program is free software; you can redistribute it and/or modify it under
  6 | the terms of the GNU General Public License as published by the Free Software
  7 | Foundation; either version 2 of the License, or (at your option) any later
  8 | version.
  9 | */
 10 | 
 11 | var system = require('system');
 12 | var fs = require('fs');
 13 | 
 14 | // ctrl-c from probe/function.js .. maybe I should create some common.js and put it somewhere .. but where?.. really.. where??
 15 | function getCookies(headers, url){
 16 | 	var a, b, c, ret = [];
 17 | 	var purl = document.createElement('a');
 18 | 	purl.href = url;
 19 | 	var domain = purl.hostname;
 20 | 
 21 | 	for(a = 0; a < headers.length; a++){
 22 | 		//console.log(JSON.stringify(headers[a]))
 23 | 		if(headers[a].name.toLowerCase() == "set-cookie"){
 24 | 			var cookies = headers[a].value.split("\n");	 // phantomjs stores multiple cookies in this way ..
 25 | 			for(b = 0; b < cookies.length; b++){
 26 | 				var ck = cookies[b].split(/; */);
 27 | 				var cookie = {domain: domain, path: "/", secure: false, httponly:false};
 28 | 				for(c = 0; c < ck.length; c++){
 29 | 					var kv = ck[c].split("=");
 30 | 					if(c == 0){
 31 | 						cookie.name = kv[0];
 32 | 						cookie.value = decodeURIComponent(kv[1]);
 33 | 						continue;
 34 | 					}
 35 | 					switch(kv[0].toLowerCase()){
 36 | 						case "expires":
 37 | 							if(!("expires" in cookie))
 38 | 								cookie.expires = parseInt((new Date(kv[1])).getTime() / 1000);
 39 | 							break;
 40 | 						case "max-age":
 41 | 							cookie.expires = parseInt(((new Date()).getTime() / 1000) + parseInt(kv[1]));
 42 | 							break;
 43 | 						case "domain":
 44 | 						case "path":
 45 | 							cookie[kv[0]] = kv[1];
 46 | 							break;
 47 | 						case "httponly":
 48 | 						case "secure":
 49 | 							cookie[kv[0]] = true;
 50 | 							break;
 51 | 					}
 52 | 				}
 53 | 				ret.push(cookie);
 54 | 			}
 55 | 		}
 56 | 	}
 57 | 	return ret;
 58 | };
 59 | 
 60 | 
 61 | 
 62 | function print(mess){
 63 | 	output.push(mess)
 64 | }
 65 | 
 66 | if(system.args.length < 4){
 67 | 	console.log("usage: login.js <url> <login> <password> [<button_text>]");
 68 | 	phantom.exit(0);
 69 | }
 70 | 
 71 | 
 72 | var step = 0;
 73 | var allCookies = [];
 74 | var output = []
 75 | 
 76 | var url = system.args[1];
 77 | var login = system.args[2];
 78 | var password = system.args[3];
 79 | var buttonTxt = system.args[4] || null;
 80 | 
 81 | 
 82 | var page = require('webpage').create();
 83 | 
 84 | page.onConsoleMessage = function(msg, lineNum, sourceId) {
 85 | 
 86 | 	//console.log("console: " + msg);
 87 | }
 88 | 
 89 | page.onResourceReceived = function(response) {
 90 | 		var cookies = getCookies(response.headers, url)
 91 | 		for(var a = 0; a < cookies.length; a++){
 92 | 			allCookies.push(cookies[a])
 93 | 		}
 94 | };
 95 | 
 96 | phantom.onError = function(msg, trace) {}
 97 | page.onError = function(msg, trace) {}
 98 | 
 99 | page.onCallback = function(data) {
100 | 	switch(data.cmd){
101 | 		case "next":
102 | 			step = 2;
103 | 			//page.render("010101.png")
104 | 			return;
105 | 		case "end":
106 | 			console.log(JSON.stringify(output))
107 | 			phantom.exit(0);
108 | 		case "print":
109 | 			print(JSON.parse(data.par))
110 | 			return;
111 | 	}
112 | }
113 | 
114 | page.onLoadFinished = function(status) {
115 | 	if(status != 'success'){
116 | 		console.log("error loading page " + url);
117 | 		phantom.exit(1);
118 | 	}
119 | 
120 | 	if(step < 2)return;
121 | 
122 | 	if(allCookies.length > 0){
123 | 		print(allCookies);
124 | 	} else {
125 | 		print([]);
126 | 	}
127 | 
128 | 	page.evaluate(function(){
129 | 		var els = document.getElementsByTagName("a");
130 | 		var lu = [];
131 | 		var re = /.*(log|sign)(_|\-| )?(out|off).*/gi;
132 | 		for(var a = 0; a < els.length; a++){
133 | 			if(els[a].href.match(re) || els[a].innerText.match(re)){
134 | 				lu.push(els[a].href)
135 | 			}
136 | 		}
137 | 		if(lu.length > 0){
138 | 			callPhantom({cmd:"print",par:JSON.stringify(lu)});
139 | 		} else {
140 | 			callPhantom({cmd:"print",par:'[]'});
141 | 		}
142 | 
143 | 		callPhantom({cmd:"end"});
144 | 	})
145 | }
146 | 
147 | 
148 | 
149 | page.settings.loadImages = false;
150 | 
151 | page.open(url, {}, function(status){
152 | 	page.evaluate(function(login, password,buttonTxt){
153 | 		function trigger(el, evname){
154 | 			if ('createEvent' in document) {
155 | 				var evt = document.createEvent('HTMLEvents');
156 | 				evt.initEvent(evname, true, false);
157 | 				el.dispatchEvent(evt);
158 | 			} else {
159 | 				evname = 'on' + evname;
160 | 				if( evname in el && typeof el[evname] == "function"){
161 | 					el[evname]();
162 | 				}
163 | 			}
164 | 		};
165 | 
166 | 		function getAdiacent(cont, selector){
167 | 			var ad = null;
168 | 
169 | 			while(!ad && cont){
170 | 				ad = cont.querySelector(selector)
171 | 				cont = cont.parentNode
172 | 			}
173 | 			return ad
174 | 		}
175 | 
176 | 		var passw_el = document.querySelector("input[type=password]");
177 | 		passw_el.value = password;
178 | 		var login_el = getAdiacent(passw_el, "input[type=text],input[type=email],input:not([type])");
179 | 		var button_el = null;
180 | 		if(buttonTxt){
181 | 			var els = document.getElementsByTagName("*");
182 | 			for(var a = 0; a < els.length; a++){
183 | 				for(var ch = els[a].firstChild; ch; ch = ch.nextSibling){
184 | 					if(ch.nodeType != 3)continue; // skip non textNodes
185 | 					if(ch.nodeValue.toLowerCase().trim() == buttonTxt.toLowerCase()){
186 | 						button_el = els[a];
187 | 						break;
188 | 					}
189 | 				}
190 | 			}
191 | 		} else {
192 | 			button_el = getAdiacent(passw_el, "input[type=submit],button");
193 | 			if(!button_el){
194 | 				button_el = getAdiacent(passw_el, "a");
195 | 			}
196 | 		}
197 | 		if(!login_el || ! button_el){
198 | 			console.log("error")
199 | 		}
200 | 		login_el.value = login;
201 | 		trigger(login_el, "blur")
202 | 		trigger(login_el, "change")
203 | 		trigger(passw_el, "blur")
204 | 		trigger(passw_el, "change")
205 | 
206 | 		setTimeout(function(){
207 | 			trigger(button_el, "click");
208 | 			callPhantom({cmd:"next"});
209 | 		},50);
210 | 
211 | 	},login, password, buttonTxt);
212 | });
213 | 
214 | 


--------------------------------------------------------------------------------
/core/lib/cookie.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - www.htcap.org
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | import cookielib
 14 | import time
 15 | from urllib import quote
 16 | from urlparse import urlparse
 17 | 
 18 | 
 19 | class Cookie:
 20 |     """
 21 |     RFC 6265
 22 |     """
 23 | 
 24 |     def __init__(self, cookie, setter=None):
 25 |         self.name = (str(cookie['name']) if 'name' in cookie and cookie['name'] else None)
 26 |         self.domain = (str(cookie['domain']) if 'domain' in cookie and cookie['domain'] else None)
 27 |         self.path = (str(cookie['path']) if 'path' in cookie and cookie['path'] else "/")
 28 | 
 29 |         # setter is the url that set this cookie, it's used to handle cookies with domain=None
 30 |         # if both domain and setter are None then no domain restrictions are applied (used when cookied are loaded from db)
 31 |         self.setter = urlparse(setter) if setter else None
 32 | 
 33 |         # if self.domain[0] != ".":
 34 |         # 	self.domain = "." + self.domain
 35 | 
 36 |         self.update(cookie)
 37 | 
 38 |     def update(self, cookie):
 39 |         self.value = (quote(str(cookie['value'])) if 'value' in cookie and cookie['value'] else None)
 40 |         self.expires = (cookie['expires'] if 'expires' in cookie else None)
 41 |         self.secure = (cookie['secure'] if 'secure' in cookie else False)
 42 |         self.httponly = (cookie['httponly'] if 'httponly' in cookie else False)
 43 | 
 44 |     def __eq__(self, other):
 45 |         return (other
 46 |                 and self.name == other.name
 47 |                 and self.path == other.path
 48 |                 and (self.domain == None or other.domain == None or self.domain == other.domain)
 49 |                 )
 50 | 
 51 |     def get_string(self):
 52 |         return "%s=%s; path = %s" % (self.name, self.value, self.path)  # self.setter)
 53 | 
 54 |     # if domain is set it is valid for all subdomains
 55 |     # if domain is not set it is valid only for the setter's domain
 56 |     def is_valid_for_url(self, url):
 57 |         purl = urlparse(url)
 58 |         # the preceding dot in domain is optional so .foo.com and foo.com are the same
 59 |         if self.domain is None:
 60 |             # domain is considered the domain of setter
 61 |             # the cookie is valid if url's domain is EQUAL to setter's domain
 62 |             # if setter is None, no domain restrictions are applied (ie when loading cookies from db)
 63 |             if self.setter and purl.hostname != self.setter.hostname: return False
 64 |         else:
 65 |             if not purl.hostname: return False
 66 |             # url is valid ALSO if it is a subdomain of self.domain
 67 |             sh = [t for t in self.domain.split(".")[::-1] if t]  # skip empty vals (in case of .foo.bar)
 68 |             uh = purl.hostname.split(".")[::-1]
 69 |             # @TODO DO NOT trust self.domain blindely .. check if = to setter...
 70 |             if uh[:len(sh)] != sh: return False
 71 | 
 72 |         if self.path:
 73 |             # check if url's path is equal or subfolder of self.path
 74 |             if not purl.path: return False
 75 |             sp = [t for t in self.path.split("/") if t]
 76 |             up = [t for t in purl.path.split("/") if t]
 77 |             if up[:len(sp)] != sp: return False
 78 | 
 79 |         # @TODO!!!
 80 |         if self.expires:
 81 |             pass
 82 | 
 83 |         # print "%s is valid for %s" % (self.get_string(), url)
 84 | 
 85 |         return True
 86 | 
 87 |     # def get_json(self):
 88 |     # 	return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True)
 89 | 
 90 |     def get_dict(self):
 91 |         return dict(
 92 |             name=self.name,
 93 |             value=self.value,
 94 |             domain=self.domain,
 95 |             path=self.path,
 96 |             secure=self.secure,
 97 |             expires=self.expires,
 98 |             httponly=self.httponly
 99 |         )
100 | 
101 |     def get_cookielib_cookie(self):
102 |         return cookielib.Cookie(
103 |             version=0,
104 |             name=self.name,
105 |             value=self.value,
106 |             port=None,
107 |             port_specified=False,
108 |             domain=self.domain if self.domain else "",  # is this ok?
109 |             domain_specified=True,
110 |             domain_initial_dot=False,
111 |             path=self.path,
112 |             path_specified=True,
113 |             secure=self.secure,
114 |             expires=self.expires,
115 |             discard=True,
116 |             comment=None,
117 |             comment_url=None,
118 |             rest=None
119 |         )
120 | 
121 |     def get_as_netscape(self):
122 |         """
123 |         7 tab delimited properties:
124 |             domain - The domain that created AND that can read the variable. 
125 |             flag - A TRUE/FALSE value indicating if all machines within a given domain can access 
126 |                 the variable. This value is set automatically by the browser, depending on the value you set for domain. 
127 |             path - The path within the domain that the variable is valid for. 
128 |             secure - A TRUE/FALSE value indicating if a secure connection with the domain is needed      to access the variable. 
129 |             expiration - The UNIX time that the variable will expire on. UNIX time is defined as   the number of seconds since Jan 1, 1970 00:00:00 GMT. 
130 |             name - The name of the variable. 
131 |             value - The value of the variable.
132 |         """
133 | 
134 |         domain = self.domain
135 |         if domain:
136 |             if not domain.startswith("."): domain = ".%s" % domain
137 |         else:
138 |             domain = self.setter.hostname if self.setter else "."
139 | 
140 |         # @TODO capire come e se settare 'flag'
141 |         flag = "TRUE"
142 |         # @TODO non è chiaro cosa setto se il cookie non ha expire date .. per ora lo setto nel futuro e pace
143 |         expiry = self.expires if self.expires else (time.time() + (3600 * 24 * 7))
144 |         values = (domain, flag, self.path, ("TRUE" if self.secure else "FALSE"), expiry, self.name, self.value)
145 |         return "%s\t%s\t%s\t%s\t%d\t%s\t%s" % values
146 | 
147 |     def __str__(self):
148 |         return "Cookie: %s=%s" % (self.name, self.value)
149 | 


--------------------------------------------------------------------------------
/core/lib/thirdparty/simhash/__init__.py:
--------------------------------------------------------------------------------
  1 | # Created by 1e0n in 2013
  2 | from __future__ import division, unicode_literals
  3 | 
  4 | import sys
  5 | import re
  6 | import hashlib
  7 | import logging
  8 | import collections
  9 | from itertools import groupby
 10 | 
 11 | if sys.version_info[0] >= 3:
 12 |     basestring = str
 13 |     unicode = str
 14 |     long = int
 15 | else:
 16 |     range = xrange
 17 | 
 18 | 
 19 | class Simhash(object):
 20 | 
 21 |     def __init__(self, value, f=64, reg=r'[\w\u4e00-\u9fcc]+', hashfunc=None):
 22 |         """
 23 |         `f` is the dimensions of fingerprints
 24 | 
 25 |         `reg` is meaningful only when `value` is basestring and describes
 26 |         what is considered to be a letter inside parsed string. Regexp
 27 |         object can also be specified (some attempt to handle any letters
 28 |         is to specify reg=re.compile(r'\w', re.UNICODE))
 29 | 
 30 |         `hashfunc` accepts a utf-8 encoded string and returns a unsigned
 31 |         integer in at least `f` bits.
 32 |         """
 33 | 
 34 |         self.f = f
 35 |         self.reg = reg
 36 |         self.value = None
 37 | 
 38 |         if hashfunc is None:
 39 |             def _hashfunc(x):
 40 |                 return int(hashlib.md5(x).hexdigest(), 16)
 41 | 
 42 |             self.hashfunc = _hashfunc
 43 |         else:
 44 |             self.hashfunc = hashfunc
 45 | 
 46 |         if isinstance(value, Simhash):
 47 |             self.value = value.value
 48 |         elif isinstance(value, basestring):
 49 |             self.build_by_text(unicode(value))
 50 |         elif isinstance(value, collections.Iterable):
 51 |             self.build_by_features(value)
 52 |         elif isinstance(value, long):
 53 |             self.value = value
 54 |         else:
 55 |             raise Exception('Bad parameter with type {}'.format(type(value)))
 56 | 
 57 |     def _slide(self, content, width=4):
 58 |         return [content[i:i + width] for i in range(max(len(content) - width + 1, 1))]
 59 | 
 60 |     def _tokenize(self, content):
 61 |         content = content.lower()
 62 |         content = ''.join(re.findall(self.reg, content))
 63 |         ans = self._slide(content)
 64 |         return ans
 65 | 
 66 |     def build_by_text(self, content):
 67 |         features = self._tokenize(content)
 68 |         features = {k:sum(1 for _ in g) for k, g in groupby(sorted(features))}
 69 |         return self.build_by_features(features)
 70 | 
 71 |     def build_by_features(self, features):
 72 |         """
 73 |         `features` might be a list of unweighted tokens (a weight of 1
 74 |                    will be assumed), a list of (token, weight) tuples or
 75 |                    a token -> weight dict.
 76 |         """
 77 |         v = [0] * self.f
 78 |         masks = [1 << i for i in range(self.f)]
 79 |         if isinstance(features, dict):
 80 |             features = features.items()
 81 |         for f in features:
 82 |             if isinstance(f, basestring):
 83 |                 h = self.hashfunc(f.encode('utf-8'))
 84 |                 w = 1
 85 |             else:
 86 |                 assert isinstance(f, collections.Iterable)
 87 |                 h = self.hashfunc(f[0].encode('utf-8'))
 88 |                 w = f[1]
 89 |             for i in range(self.f):
 90 |                 v[i] += w if h & masks[i] else -w
 91 |         ans = 0
 92 |         for i in range(self.f):
 93 |             if v[i] >= 0:
 94 |                 ans |= masks[i]
 95 |         self.value = ans
 96 | 
 97 |     def distance(self, another):
 98 |         assert self.f == another.f
 99 |         x = (self.value ^ another.value) & ((1 << self.f) - 1)
100 |         ans = 0
101 |         while x:
102 |             ans += 1
103 |             x &= x - 1
104 |         return ans
105 | 
106 | 
107 | class SimhashIndex(object):
108 | 
109 |     def __init__(self, objs, f=64, k=2):
110 |         """
111 |         `objs` is a list of (obj_id, simhash)
112 |         obj_id is a string, simhash is an instance of Simhash
113 |         `f` is the same with the one for Simhash
114 |         `k` is the tolerance
115 |         """
116 |         self.k = k
117 |         self.f = f
118 |         count = len(objs)
119 |         logging.info('Initializing %s data.', count)
120 | 
121 |         self.bucket = collections.defaultdict(set)
122 | 
123 |         for i, q in enumerate(objs):
124 |             if i % 10000 == 0 or i == count - 1:
125 |                 logging.info('%s/%s', i + 1, count)
126 | 
127 |             self.add(*q)
128 | 
129 |     def get_near_dups(self, simhash):
130 |         """
131 |         `simhash` is an instance of Simhash
132 |         return a list of obj_id, which is in type of str
133 |         """
134 |         assert simhash.f == self.f
135 | 
136 |         ans = set()
137 | 
138 |         for key in self.get_keys(simhash):
139 |             dups = self.bucket[key]
140 |             logging.debug('key:%s', key)
141 |             if len(dups) > 200:
142 |                 logging.warning('Big bucket found. key:%s, len:%s', key, len(dups))
143 | 
144 |             for dup in dups:
145 |                 sim2, obj_id = dup.split(',', 1)
146 |                 sim2 = Simhash(long(sim2, 16), self.f)
147 | 
148 |                 d = simhash.distance(sim2)
149 |                 if d <= self.k:
150 |                     ans.add(obj_id)
151 |         return list(ans)
152 | 
153 |     def add(self, obj_id, simhash):
154 |         """
155 |         `obj_id` is a string
156 |         `simhash` is an instance of Simhash
157 |         """
158 |         assert simhash.f == self.f
159 | 
160 |         for key in self.get_keys(simhash):
161 |             v = '%x,%s' % (simhash.value, obj_id)
162 |             self.bucket[key].add(v)
163 | 
164 |     def delete(self, obj_id, simhash):
165 |         """
166 |         `obj_id` is a string
167 |         `simhash` is an instance of Simhash
168 |         """
169 |         assert simhash.f == self.f
170 | 
171 |         for key in self.get_keys(simhash):
172 |             v = '%x,%s' % (simhash.value, obj_id)
173 |             if v in self.bucket[key]:
174 |                 self.bucket[key].remove(v)
175 | 
176 |     @property
177 |     def offsets(self):
178 |         """
179 |         You may optimize this method according to <http://www.wwwconference.org/www2007/papers/paper215.pdf>
180 |         """
181 |         return [self.f // (self.k + 1) * i for i in range(self.k + 1)]
182 | 
183 |     def get_keys(self, simhash):
184 |         for i, offset in enumerate(self.offsets):
185 |             if i == (len(self.offsets) - 1):
186 |                 m = 2 ** (self.f - offset) - 1
187 |             else:
188 |                 m = 2 ** (self.offsets[i + 1] - offset) - 1
189 |             c = simhash.value >> offset & m
190 |             yield '%x:%x' % (c, i)
191 | 
192 |     def bucket_size(self):
193 |         return len(self.bucket)
194 | 


--------------------------------------------------------------------------------
/core/scan/base_scanner.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | import sys
 15 | import time
 16 | import re
 17 | import json
 18 | import urllib
 19 | import cookielib
 20 | import threading
 21 | import base64
 22 | import posixpath
 23 | import tempfile
 24 | import os
 25 | import uuid
 26 | import urllib2
 27 | import shutil
 28 | import datetime
 29 | 
 30 | from urlparse import urlparse, urlsplit, urljoin, parse_qs
 31 | 
 32 | import core.lib.thirdparty.pysocks.socks as socks
 33 | from core.lib.thirdparty.pysocks.sockshandler import SocksiPyHandler
 34 | 
 35 | from core.lib.exception import *
 36 | from core.crawl.lib.shared import *
 37 | 
 38 | 
 39 | from core.lib.request import Request
 40 | 
 41 | from core.lib.cookie import Cookie
 42 | 
 43 | 
 44 | from core.lib.shell import CommandExecutor
 45 | from core.lib.database import Database
 46 | 
 47 | from core.lib.utils import *
 48 | from core.constants import *
 49 | 
 50 | from core.lib.request_pattern import RequestPattern
 51 | 
 52 | 
 53 | 
 54 | class BaseScanner:
 55 | 	def __init__(self, db_file, num_threads, request_types, process_timeout, scanner_exe, display_progress, scanner_argv):
 56 | 		self.scan_start_time = int(time.time())
 57 | 		self.threads = []
 58 | 		self._th_lock = threading.Lock()
 59 | 		self._th_lock_db = threading.Lock()
 60 | 		self.performed_requests = 0
 61 | 		self._urlpatterns = []
 62 | 		self._exitcode = 0
 63 | 		self.scanner_name = self.__class__.__name__.lower()
 64 | 		self._running = False
 65 | 		self.settings = self.get_settings()
 66 | 
 67 | 		#override default settings
 68 | 		if num_threads: self.settings['num_threads'] = num_threads
 69 | 		if request_types: self.settings['request_types'] = request_types
 70 | 		if process_timeout: self.settings['process_timeout'] = process_timeout
 71 | 		if scanner_exe: self.settings['scanner_exe'] = scanner_exe
 72 | 		self.settings['scanner_exe'] = self.settings['scanner_exe'].split(" ")
 73 | 
 74 | 
 75 | 
 76 | 		self.db = Database(db_file)
 77 | 		self.id_assessment = self.db.create_assessment(self.scanner_name, int(time.time()))
 78 | 		self.pending_requests = self.db.get_requests(self.settings['request_types'])
 79 | 		self.tot_requests = len(self.pending_requests)
 80 | 		self._duplicated_requests = []
 81 | 
 82 | 		urlpatterns = []
 83 | 		for req in self.pending_requests:
 84 | 			patt = RequestPattern(req).pattern
 85 | 			if patt in urlpatterns:
 86 | 				self._duplicated_requests.append(req.db_id)
 87 | 			else:
 88 | 				urlpatterns.append(patt)
 89 | 
 90 | 		init = self.init(scanner_argv if scanner_argv else [])
 91 | 
 92 | 		self._running = True
 93 | 		print "Scanner %s started with %d threads" % (self.scanner_name, self.settings['num_threads']) 
 94 | 
 95 | 		for n in range(0, self.settings['num_threads']):
 96 | 			thread = self.Executor(self)
 97 | 			self.threads.append(thread)
 98 | 			thread.start()
 99 | 
100 | 		try:
101 | 			self.wait_executor(self.threads, display_progress)
102 | 		except KeyboardInterrupt:
103 | 			print "\nTerminated by user"
104 | 			self.kill_threads()
105 | 
106 | 		self.save_assessment()
107 | 		sys.exit(self._exitcode)
108 | 
109 | 
110 | 	def get_settings(self):
111 | 		return dict(
112 | 			request_types = "xhr,link,redirect,form,json",
113 | 			num_threads = 10,
114 | 			process_timeout = 120,
115 | 			scanner_exe = ""
116 | 		)
117 | 
118 | 
119 | 	def get_cmd(self, url, outfile):
120 | 		cmd = []
121 | 		return cmd
122 | 
123 | 
124 | 	def scanner_executed(self, id_parent, out, err, out_file):
125 | 		return
126 | 
127 | 
128 | 	def wait_executor(self, threads, display_progress):
129 | 		executor_done = False
130 | 		while not executor_done:
131 | 			executor_done = True
132 | 			for th in threads:
133 | 				if th.isAlive():
134 | 					executor_done = False
135 | 				th.join(1)
136 | 
137 | 				if display_progress:
138 | 					self._th_lock.acquire()
139 | 					scanned = self.performed_requests
140 | 					pending = len(self.pending_requests)
141 | 					tot = self.tot_requests
142 | 					self._th_lock.release()
143 | 
144 | 					print_progressbar(tot, scanned, self.scan_start_time, "requests scanned")
145 | 		if display_progress:
146 | 			print ""
147 | 
148 | 
149 | 	def kill_threads(self):
150 | 		self._th_lock.acquire()
151 | 		for th in self.threads:
152 | 			if th.isAlive(): th.exit = True
153 | 		self._th_lock.release()
154 | 
155 | 
156 | 	def exit(self, code):
157 | 		if self._running:
158 | 			self._th_lock.acquire()
159 | 			self._exitcode = code
160 | 			self._th_lock.release()
161 | 			self.kill_threads()
162 | 			print "kill thread"
163 | 			print ""
164 | 		else :
165 | 			sys.exit(code)
166 | 
167 | 
168 | 	def save_vulnerability(self, request, type, description):
169 | 		self._th_lock_db.acquire()
170 | 		self.db.insert_vulnerability(self.id_assessment, request.db_id, type, description)
171 | 		self._th_lock_db.release()
172 | 
173 | 
174 | 	def save_assessment(self):
175 | 		self._th_lock_db.acquire()
176 | 		self.db.save_assessment(self.id_assessment, int(time.time()))
177 | 		self._th_lock_db.release()
178 | 
179 | 
180 | 	def is_request_duplicated(self, request):
181 | 		return request.db_id in self._duplicated_requests
182 | 
183 | 
184 | 	class Executor(threading.Thread):
185 | 
186 | 		def __init__(self, scanner):
187 | 			threading.Thread.__init__(self)
188 | 			self.scanner = scanner
189 | 			self.exit = False
190 | 			self.thread_uuid = uuid.uuid4()
191 | 			self.tmp_dir = "%s%shtcap_tempdir-%s" % (tempfile.gettempdir(), os.sep, self.thread_uuid)
192 | 			os.makedirs(self.tmp_dir, 0700)
193 | 
194 | 		def inc_counter(self):
195 | 			self.scanner._th_lock.acquire()
196 | 			self.scanner.performed_requests += 1
197 | 			self.scanner._th_lock.release()
198 | 
199 | 		def run(self):
200 | 			req = None
201 | 			while True:
202 | 
203 | 				self.scanner._th_lock.acquire()
204 | 				if self.exit == True or len(self.scanner.pending_requests) == 0:
205 | 					self.scanner._th_lock.release()
206 | 					shutil.rmtree(self.tmp_dir)
207 | 					return
208 | 
209 | 				req = self.scanner.pending_requests.pop()
210 | 
211 | 				self.scanner._th_lock.release()
212 | 
213 | 
214 | 				cmd_options = self.scanner.get_cmd(req, self.tmp_dir)
215 | 				if cmd_options == False: 
216 | 					self.inc_counter()
217 | 					continue
218 | 
219 | 				cmd = self.scanner.settings['scanner_exe'] + cmd_options
220 | 
221 | 
222 | 				exe = CommandExecutor(cmd, True)
223 | 				out, err = exe.execute(self.scanner.settings['process_timeout'])
224 | 				# if err: print "\nError: \n%s\n%s\n%s\n" % (err," ".join(cmd),out)
225 | 
226 | 				self.inc_counter()
227 | 
228 | 				self.scanner.scanner_executed(req, out,err, self.tmp_dir, cmd)
229 | 
230 | 


--------------------------------------------------------------------------------
/core/util/utilities/report.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | 
  4 | """
  5 | HTCAP - beta 1
  6 | Author: filippo.cavallarin@wearesegment.com
  7 | 
  8 | This program is free software; you can redistribute it and/or modify it under 
  9 | the terms of the GNU General Public License as published by the Free Software 
 10 | Foundation; either version 2 of the License, or (at your option) any later 
 11 | version.
 12 | """
 13 | 
 14 | import sys
 15 | import os
 16 | import sqlite3
 17 | import json
 18 | from urlparse import urlsplit
 19 | from core.util.base_util import BaseUtil
 20 | from core.lib.utils import *
 21 | reload(sys)
 22 | sys.setdefaultencoding('utf8')
 23 | 
 24 | 
 25 | 
 26 | class Report(BaseUtil):
 27 | 
 28 | 	def dict_from_row(self, row):
 29 | 		return dict(zip(row.keys(), row)) 
 30 | 
 31 | 	@staticmethod
 32 | 	def get_settings():
 33 | 		return dict(
 34 | 			descr = "Generate the html report",
 35 | 			optargs = '',
 36 | 			minargs = 2
 37 | 		)
 38 | 
 39 | 	def usage(self):
 40 | 		return (
 41 | 			"%s\n"
 42 | 			"usage: %s <dbfile> <outfile>\n" 
 43 | 			% (self.get_settings()['descr'], self.utilname)
 44 | 		)
 45 | 
 46 | 
 47 | 	def get_report(self, cur):
 48 | 		report = []
 49 | 		qry = """
 50 | 			SELECT r.type,r.id,r.url,r.method,r.data,r.http_auth,r.referer,r.out_of_scope, ri.trigger, r.crawler_errors, 
 51 | 			 (ri.id is not null) AS has_requests, ri.type AS req_type,ri.method AS req_method,ri.url AS req_url,ri.data AS req_data 
 52 | 			FROM request r 
 53 | 			LEFT JOIN request_child rc ON r.id=rc.id_request
 54 | 			LEFT JOIN request ri ON ri.id = rc.id_child
 55 | 			WHERE
 56 | 			r.type IN ('link', 'redirect','form')
 57 | 			and (has_requests=0 OR req_type IN ('xhr','form','websocket') OR (req_type='jsonp' AND ri.trigger <> ''))
 58 | 		"""
 59 | 		try:
 60 | 			cur.execute(qry)
 61 | 			for r in cur.fetchall():
 62 | 				report.append(self.dict_from_row(r))			 
 63 | 		except Exception as e:
 64 | 			print str(e)
 65 | 
 66 | 		return report
 67 | 
 68 | 	def get_assessment_vulnerabilities(self, cur, id_request):
 69 | 		report = []
 70 | 		qry = """
 71 | 			SELECT type, description FROM vulnerability WHERE id_request IN (
 72 | 				SELECT id FROM request WHERE (
 73 | 					id=? AND type IN ('link','redirect')) OR 
 74 | 					(id_parent=? AND type IN ('xhr','jsonp','form','websocket')
 75 | 				)
 76 | 			)
 77 | 		"""
 78 | 
 79 | 		try:
 80 | 
 81 | 			cur.execute(qry, (id_request,id_request))
 82 | 			for r in cur.fetchall():
 83 | 				report.append(json.dumps({"type":r['type'], "description":r['description']}))			 
 84 | 		except Exception as e:
 85 | 			print str(e)
 86 | 
 87 | 
 88 | 		return report
 89 | 
 90 | 
 91 | 	def get_crawl_info(self, cur):
 92 | 		crawl = None
 93 | 		qry = """
 94 | 			SELECT *,
 95 | 			 (SELECT htcap_version FROM crawl_info) AS htcap_version,
 96 | 			 (SELECT COUNT(*) FROM request WHERE crawled=1) AS pages_crawled 
 97 | 			FROM crawl_info
 98 | 		"""
 99 | 
100 | 		try:
101 | 
102 | 			cur.execute(qry)
103 | 			crawl = self.dict_from_row(cur.fetchone())
104 | 		except Exception as e:
105 | 			print str(e)
106 | 
107 | 		return crawl
108 | 
109 | 	def get_request_cmp_tuple(self, row):
110 | 		# http_auth in included in the url
111 | 		return (row['url'], row['method'], row['data'])
112 | 
113 | 	def add_http_auth(self, url, auth):
114 | 		purl = urlsplit(url)
115 | 		return purl._replace(netloc="%s@%s" % (auth, purl.netloc)).geturl()
116 | 
117 | 	def get_json(self, cur):
118 | 		report = self.get_report(cur)
119 | 		infos = self.get_crawl_info(cur)
120 | 
121 | 	 
122 | 		ret = dict(
123 | 			infos= infos,
124 | 			results = []
125 | 		)
126 | 
127 | 		for row in report:
128 | 			if row['http_auth']:
129 | 				row['url'] = self.add_http_auth(row['url'], row['http_auth'])
130 | 
131 | 			if self.get_request_cmp_tuple(row) in [self.get_request_cmp_tuple(r) for r in ret['results']]: continue
132 | 			d = dict(
133 | 				id = row['id'],
134 | 				url = row['url'],
135 | 				method = row['method'],
136 | 				data = row['data'],
137 | 				referer = row['referer'],
138 | 				xhr = [],
139 | 				jsonp = [],
140 | 				websockets = [],
141 | 				forms = [],
142 | 				errors = json.loads(row['crawler_errors']) if row['crawler_errors'] else [],
143 | 				vulnerabilities =  self.get_assessment_vulnerabilities(cur, row['id'])
144 | 			)
145 | 			if row['out_of_scope']: d['out_of_scope'] = True
146 | 
147 | 			if row['has_requests']:
148 | 				for r in report:
149 | 					if r['id'] != row['id']: continue
150 | 					req_obj = {}
151 | 
152 | 					trigger = json.loads(r['trigger']) if 'trigger' in r and r['trigger'] else None # {'event':'ready','element':'[document]'}
153 | 					req_obj['trigger'] = "%s.%s()" % (trigger['element'], trigger['event']) if trigger else ""
154 | 
155 | 					if r['req_type']=='xhr':
156 | 						req_obj['request'] = ["%s %s" % (r['req_method'], r['req_url'])]
157 | 						if r['req_data']: req_obj['request'].append(r['req_data'])
158 | 						d['xhr'].append(req_obj)
159 | 
160 | 					elif r['req_type']=='jsonp':
161 | 						req_obj['request'] = r['req_url']
162 | 						d['jsonp'].append(req_obj)
163 | 
164 | 					elif r['req_type']=='websocket':
165 | 						req_obj['request'] = r['req_url']
166 | 						d['websockets'].append(req_obj)
167 | 
168 | 					elif r['req_type']=='form':
169 | 						#req_obj['request'] = "%s %s data:%s" % (r['req_method'], r['req_url'], r['req_data'])
170 | 						req_obj['request'] = ["%s %s" % (r['req_method'], r['req_url'])]
171 | 						if r['req_data']: req_obj['request'].append(r['req_data'])
172 | 						d['forms'].append(req_obj)
173 | 
174 | 
175 | 			if row['has_requests'] or row['out_of_scope'] or len(d['errors']) > 0 or len(d['vulnerabilities']) > 0:
176 | 				ret['results'].append(d)
177 | 
178 | 		return json.dumps(ret)
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 	def main(self, args, opts):
186 | 
187 | 		base_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep + "htmlreport" + os.sep
188 | 
189 | 		# if len(args) < 3:
190 | 		# 	print "usage: %s <dbfile> <outfile>" % args[0]
191 | 		# 	sys.exit(1)
192 | 
193 | 		dbfile = args[0]
194 | 		outfile = args[1]
195 | 
196 | 		if not os.path.exists(dbfile):
197 | 			print "No such file: %s" % dbfile
198 | 			sys.exit(1)
199 | 
200 | 		if os.path.exists(outfile):
201 | 			sys.stdout.write("File %s already exists. Overwrite [y/N]: " % outfile)
202 | 			if sys.stdin.read(1) != "y":
203 | 				sys.exit(1)
204 | 			os.remove(outfile)
205 | 
206 | 		conn = sqlite3.connect(dbfile)
207 | 		conn.row_factory = sqlite3.Row
208 | 		cur = conn.cursor() 
209 | 
210 | 		base_html = (
211 | 			"<html>\n"
212 | 			"<head>\n"
213 | 			"<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\n"
214 | 			"<style>\n%s\n</style>\n"
215 | 			"<script>\n%s\n%s\n</script>\n"
216 | 			"</head>\n"
217 | 			"%s\n"
218 | 			"</html>\n"
219 | 		)
220 | 
221 | 
222 | 		jsn = "var report = %s;\n" % self.get_json(cur)
223 | 
224 | 		with open("%sreport.html" % base_dir) as html, open("%sreport.js" % base_dir) as js, open("%sstyle.css" % base_dir) as css:
225 | 			html = base_html % (css.read(), jsn, js.read(), html.read())
226 | 
227 | 		with open(outfile,'w') as out:
228 | 			out.write(html)
229 | 
230 | 		print "Report saved to %s" % outfile
231 | 
232 | 


--------------------------------------------------------------------------------
/core/crawl/crawler_thread.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under
  8 | the terms of the GNU General Public License as published by the Free Software
  9 | Foundation; either version 2 of the License, or (at your option) any later
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | 
 15 | import json
 16 | import os
 17 | import tempfile
 18 | import threading
 19 | import uuid
 20 | from time import sleep
 21 | 
 22 | from core.constants import *
 23 | from core.crawl.lib.crawl_result import CrawlResult
 24 | from core.crawl.lib.probe import Probe
 25 | from core.crawl.lib.shared import Shared
 26 | from core.crawl.lib.utils import adjust_requests
 27 | from core.lib.exception import ThreadExitRequestException
 28 | from core.lib.http_get import HttpGet
 29 | from core.lib.shell import CommandExecutor
 30 | 
 31 | 
 32 | # TODO: use NamedTemporaryFile for self._cookie_file
 33 | # from core.lib.utils import cmd_to_str
 34 | 
 35 | 
 36 | class CrawlerThread(threading.Thread):
 37 |     _PROCESS_RETRIES_INTERVAL = 0.5
 38 |     _PROCESS_RETRIES = 2
 39 | 
 40 |     def __init__(self):
 41 |         threading.Thread.__init__(self)
 42 | 
 43 |         self.status = THSTAT_RUNNING
 44 |         self.exit = False
 45 | 
 46 |         self._thread_uuid = uuid.uuid4()
 47 |         self._cookie_file = "%s%shtcap_cookiefile-%s.json" % (tempfile.gettempdir(), os.sep, self._thread_uuid)
 48 | 
 49 |     def run(self):
 50 |         self._crawl()
 51 | 
 52 |     def _crawl(self):
 53 | 
 54 |         while True:
 55 |             requests = []
 56 |             errors = []
 57 | 
 58 |             try:
 59 |                 request = self._wait_request()
 60 |             except ThreadExitRequestException:
 61 |                 if os.path.exists(self._cookie_file):
 62 |                     os.remove(self._cookie_file)
 63 |                 return
 64 |             except Exception as e:
 65 |                 print("-->" + str(e))
 66 |                 continue
 67 | 
 68 |             probe = self._send_probe(request, errors)
 69 | 
 70 |             if probe:
 71 |                 if probe.status == "ok" or probe.errcode == ERROR_PROBE_TO:
 72 | 
 73 |                     requests = probe.requests
 74 |                     if len(probe.user_output) > 0:
 75 |                         request.user_output = probe.user_output
 76 | 
 77 |                     # if the probe return some cookies set it has the last one
 78 |                     if probe.cookies:
 79 |                         Shared.end_cookies = probe.cookies
 80 | 
 81 |             else:
 82 |                 errors.append(ERROR_PROBEFAILURE)
 83 |                 # get urls with python to continue crawling
 84 |                 if not Shared.options['use_urllib_onerror']:
 85 |                     continue
 86 |                 try:
 87 |                     hr = HttpGet(request, Shared.options['process_timeout'], CrawlerThread._PROCESS_RETRIES,
 88 |                                  Shared.options['user_agent'], Shared.options['proxy'])
 89 |                     requests = hr.get_requests()
 90 |                 except Exception as e:
 91 |                     errors.append(str(e))
 92 | 
 93 |             # set out_of_scope, apply user-supplied filters to urls (ie group_qs)
 94 |             adjust_requests(requests)
 95 | 
 96 |             Shared.main_condition.acquire()
 97 |             res = CrawlResult(request, requests, errors)
 98 |             Shared.crawl_results.append(res)
 99 |             Shared.main_condition.notify()
100 |             Shared.main_condition.release()
101 | 
102 |     def _wait_request(self):
103 |         Shared.th_condition.acquire()
104 |         while True:
105 |             if self.exit:
106 |                 Shared.th_condition.notifyAll()
107 |                 Shared.th_condition.release()
108 |                 raise ThreadExitRequestException("exit request received")
109 | 
110 |             if Shared.requests_index >= len(Shared.requests):
111 |                 self.status = THSTAT_WAITING
112 |                 # The wait method releases the lock, blocks the current thread until another thread calls notify
113 |                 Shared.th_condition.wait()
114 |                 continue
115 | 
116 |             request = Shared.requests[Shared.requests_index]
117 |             Shared.requests_index += 1
118 | 
119 |             break
120 | 
121 |         Shared.th_condition.release()
122 | 
123 |         self.status = THSTAT_RUNNING
124 | 
125 |         return request
126 | 
127 |     def _set_probe_params(self, request):
128 |         params = []
129 |         cookies = []
130 |         url = request.url
131 | 
132 |         if request.method == "POST":
133 |             params.append("-P")
134 |             if request.data:
135 |                 params.extend(("-D", request.data))
136 | 
137 |         if len(request.cookies) > 0:
138 |             for cookie in request.cookies:
139 |                 cookies.append(cookie.get_dict())
140 |             params.extend(("-c", json.dumps(cookies)))
141 | 
142 |         if request.http_auth:
143 |             params.extend(("-p", request.http_auth))
144 | 
145 |         if Shared.options['set_referer'] and request.referer:
146 |             params.extend(("-r", request.referer))
147 | 
148 |         # DEBUG:
149 |         # params.append("-vv")
150 |         params.append(url)
151 | 
152 |         return params
153 | 
154 |     def _send_probe(self, request, errors):
155 | 
156 |         probe = None
157 |         retries = CrawlerThread._PROCESS_RETRIES
158 |         params = self._set_probe_params(request)
159 | 
160 |         while retries:
161 |             # DEBUG:
162 |             # print("### INPUT: %s" % repr(Shared.probe_cmd + params))
163 |             cmd = CommandExecutor(Shared.probe_cmd + params)
164 |             jsn = cmd.execute(Shared.options['process_timeout'] + 2)
165 | 
166 |             # DEBUG:
167 |             # print("### OUTPUT: %s" % repr(jsn))
168 | 
169 |             if jsn is None:
170 |                 errors.append(ERROR_PROBEKILLED)
171 |                 sleep(CrawlerThread._PROCESS_RETRIES_INTERVAL)  # ... ???
172 |                 retries -= 1
173 |                 continue
174 |             else:
175 |                 probe_array = self._load_probe_json(jsn)
176 | 
177 |             if probe_array:
178 |                 probe = Probe(probe_array, request)
179 | 
180 |                 if probe.status == "ok":
181 |                     break
182 | 
183 |                 errors.append(probe.errcode)
184 | 
185 |                 if probe.errcode in (ERROR_CONTENTTYPE, ERROR_PROBE_TO, ERROR_FORCE_STOP):
186 |                     break
187 | 
188 |             sleep(CrawlerThread._PROCESS_RETRIES_INTERVAL)
189 |             retries -= 1
190 |         return probe
191 | 
192 |     @staticmethod
193 |     def _load_probe_json(jsn):
194 | 
195 |         if isinstance(jsn, tuple):
196 |             jsn = jsn[0]
197 | 
198 |         try:
199 |             data = json.loads(jsn)
200 |             return data
201 |         except ValueError:
202 |             print "-- JSON DECODE ERROR %s" % jsn
203 |         except Exception:
204 |             raise
205 | 


--------------------------------------------------------------------------------
/core/lib/request.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | 
 13 | import json
 14 | import re
 15 | from urlparse import urljoin, urlsplit
 16 | 
 17 | from core.constants import *
 18 | from core.lib.cookie import Cookie
 19 | from core.lib.thirdparty.simhash import Simhash
 20 | from core.lib.utils import extract_http_auth, normalize_url, remove_tokens
 21 | 
 22 | 
 23 | class Request(object):
 24 |     def __init__(self, type, method, url, parent=None, referer=None, data=None, trigger=None, json_cookies=None,
 25 |                  set_cookie=None, http_auth=None, db_id=None, parent_db_id=None, out_of_scope=None):
 26 |         self.type = type
 27 |         self.method = method
 28 |         self._html = None
 29 |         self._html_hash = None
 30 |         self.user_output = []
 31 |         url = url.strip()
 32 | 
 33 |         try:
 34 |             url = url.decode("utf-8")
 35 |         except:
 36 |             try:
 37 |                 url = url.decode("latin-1")
 38 |             except Exception as e:
 39 |                 raise AssertionError("unable to decode " + url)
 40 | 
 41 |         if type != REQTYPE_UNKNOWN:
 42 |             # extract http auth if present in url
 43 |             # if credentials are present in url, the url IS absolute so we can do this before urljoin
 44 |             # (foo:bar@example.local is NOT A VALID URL)
 45 |             auth, nurl = extract_http_auth(url)
 46 |             if auth:
 47 |                 if not http_auth:
 48 |                     http_auth = auth
 49 |                 url = nurl
 50 | 
 51 |             self.url = normalize_url(urljoin(parent.url, url) if parent else url)
 52 |         else:
 53 |             self.url = url
 54 | 
 55 |         # parent is the parent request that can be a redirect, referer is the referer page (ahead of redirects)
 56 |         self._parent = parent
 57 | 
 58 |         self.data = data if data else ""
 59 |         self.trigger = trigger
 60 |         self.db_id = db_id
 61 |         self.parent_db_id = parent_db_id
 62 |         self.out_of_scope = out_of_scope
 63 |         self.cookies = []
 64 | 
 65 |         self.http_auth = parent.http_auth if not http_auth and parent else http_auth
 66 | 
 67 |         self.redirects = parent.redirects + 1 if type == REQTYPE_REDIRECT and parent else 0
 68 | 
 69 |         if not referer and parent:
 70 |             self.referer = parent.url if type != REQTYPE_REDIRECT else parent.referer
 71 |         else:
 72 |             self.referer = referer
 73 | 
 74 |         # if type == "unknown":
 75 |         # 	return
 76 | 
 77 |         if json_cookies:
 78 |             self.all_cookies = self.cookies_from_json(json_cookies)
 79 |         else:
 80 |             set_cookie = set_cookie if set_cookie else []
 81 |             self.all_cookies = self.merge_cookies(set_cookie, parent.all_cookies) if parent else set_cookie
 82 | 
 83 |         self.cookies = [c for c in self.all_cookies if c.is_valid_for_url(self.url)]
 84 | 
 85 |     @property
 86 |     def parent(self):
 87 |         if not self._parent and self.parent_db_id:
 88 |             # fetch from db
 89 |             pass
 90 |         return self._parent
 91 | 
 92 |     @parent.setter
 93 |     def parent(self, value):
 94 |         self._parent = value
 95 | 
 96 |     @property
 97 |     def html(self):
 98 |         return self._html
 99 | 
100 |     @html.setter
101 |     def html(self, value):
102 |         self._html = value
103 |         self._html_hash = Simhash(value)
104 | 
105 |     def get_dict(self):
106 |         return dict(
107 |             type=self.type,
108 |             method=self.method,
109 |             url=self.url,
110 |             referer=self.referer,
111 |             data=self.data,
112 |             trigger=self.trigger,
113 |             cookies=self.cookies,
114 |             db_id=self.db_id,
115 |             parent_db_id=self.parent_db_id,
116 |             out_of_scope=self.out_of_scope
117 |         )
118 | 
119 |     def cookies_from_json(self, cookies):
120 |         # return [Cookie(c, self.parent.url) for c in json.loads(cookies)]
121 | 
122 |         # create Cookie without "setter" because cookies loaded from db are always valid (no domain restrictions)
123 |         # see Cookie.py
124 |         return [Cookie(c) for c in json.loads(cookies)]
125 | 
126 |     def get_cookies_as_json(self):
127 |         cookies = [c.get_dict() for c in self.cookies]
128 |         return json.dumps(cookies)
129 | 
130 |     def merge_cookies(self, cookies1, cookies2):
131 |         cookies = list(cookies2)
132 |         for parent_cookie in cookies1:
133 |             if parent_cookie not in cookies:
134 |                 cookies.append(parent_cookie)
135 |             else:
136 |                 for cookie in cookies:
137 |                     if parent_cookie == cookie:
138 |                         cookie.update(parent_cookie.__dict__)
139 | 
140 |         return cookies
141 | 
142 |     def get_full_url(self):
143 |         """
144 |         returns the url with http credentials
145 |         """
146 |         if not self.http_auth:
147 |             return self.url
148 | 
149 |         purl = urlsplit(self.url)
150 |         netloc = "%s@%s" % (self.http_auth, purl.netloc)
151 |         purl = purl._replace(netloc=netloc)
152 | 
153 |         return purl.geturl()
154 | 
155 |     # UNUSED
156 |     def tokenize_request(self, request):
157 |         """
158 |         returns an array of url components
159 |         """
160 |         purl = urlsplit(request.url)
161 | 
162 |         tokens = [purl.scheme, purl.netloc]
163 | 
164 |         if purl.path:
165 |             tokens.extend(purl.path.split("/"))
166 | 
167 |         data = [purl.query] if purl.query else []
168 | 
169 |         if request.data:
170 |             data.append(request.data)
171 | 
172 |         for d in data:
173 |             qtokens = re.split(r'(?:&amp;|&)', d)
174 |             for qt in qtokens:
175 |                 tokens.extend(qt.split("=", 1))
176 | 
177 |         # print tokens
178 |         return tokens
179 | 
180 |     # UNUSED
181 |     def is_similar(self, other):
182 |         # is equal .. so not similar
183 |         if self == other: return False
184 | 
185 |         ot = self.tokenize_request(other)
186 |         st = self.tokenize_request(self)
187 | 
188 |         if len(ot) != len(st): return False
189 |         diff = 0
190 |         for i in range(0, len(st)):
191 |             if st[i] != ot[i]: diff += 1
192 | 
193 |         if diff > 1: return False
194 | 
195 |         return True
196 | 
197 |     def __eq__(self, other):
198 |         if other == None: return False
199 |         data = self.data
200 |         odata = other.data
201 |         if self.method == "POST":
202 |             data = remove_tokens(data)
203 |             odata = remove_tokens(odata)
204 | 
205 |         return (self.method, self.url, self.type, self.http_auth, data) == (
206 |             other.method, other.url, other.type, other.http_auth, odata)
207 | 
208 |     def __repr__(self):
209 |         print "DEBUG" + self.__str__()
210 | 
211 |     def __str__(self):
212 |         return "%s %s %s %s" % (self.type, self.method, self.get_full_url(), self.data)
213 | 


--------------------------------------------------------------------------------
/core/lib/http_get.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under
  8 | the terms of the GNU General Public License as published by the Free Software
  9 | Foundation; either version 2 of the License, or (at your option) any later
 10 | version.
 11 | """
 12 | 
 13 | from __future__ import unicode_literals
 14 | 
 15 | import base64
 16 | import cookielib
 17 | import ssl
 18 | import time
 19 | import urllib2
 20 | 
 21 | import core.lib.thirdparty.pysocks.socks as socks
 22 | from core.constants import *
 23 | from core.crawl.lib.urlfinder import UrlFinder
 24 | from core.lib.cookie import Cookie
 25 | from core.lib.exception import RedirectException, NotHtmlException
 26 | from core.lib.request import Request
 27 | from core.lib.thirdparty.pysocks.sockshandler import SocksiPyHandler
 28 | 
 29 | 
 30 | class HttpGet:
 31 |     def __init__(self, request, timeout, retries=None, user_agent=None, proxy=None):
 32 |         self.request = request
 33 |         self.timeout = timeout
 34 |         self.retries = retries if retries else 1
 35 |         self.proxy = proxy
 36 |         self.retries_interval = 0.5
 37 |         self.user_agent = user_agent
 38 | 
 39 |     def urllib2_opener(self, request, jar_response, follow_redirect=None):
 40 |         url = request.url
 41 |         headers = []
 42 | 
 43 |         class RedirectHandler(urllib2.HTTPRedirectHandler):
 44 |             def http_error_302(self, req, fp, code, msg, headers):
 45 |                 raise RedirectException(headers['Location'])
 46 | 
 47 |             http_error_301 = http_error_303 = http_error_307 = http_error_302
 48 | 
 49 |         try:
 50 |             handlers = [urllib2.HTTPCookieProcessor(jar_response)]
 51 | 
 52 |             # SSLContext is available from python 2.7.9
 53 |             if hasattr(ssl, "SSLContext"):
 54 |                 handlers.append(urllib2.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_SSLv23)))
 55 | 
 56 |             if not follow_redirect:
 57 |                 handlers.append(RedirectHandler)
 58 | 
 59 |             if self.proxy:
 60 |                 if self.proxy['proto'] == "socks5":
 61 |                     # dns queries WONT go thru self.proxy .. consider "monkey patching"...
 62 |                     socksh = SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, self.proxy['host'], int(self.proxy['port']))
 63 |                     handlers.append(socksh)
 64 |                 elif self.proxy['proto'] == "http":
 65 |                     proxy_string = "http://%s:%s" % (self.proxy['host'], self.proxy['port'])
 66 |                     httproxy = urllib2.ProxyHandler({'http': proxy_string, 'https': proxy_string})
 67 |                     handlers.append(httproxy)
 68 | 
 69 |             if self.user_agent:
 70 |                 headers.append(('User-agent', self.user_agent))
 71 | 
 72 |             if request.http_auth:
 73 |                 auths = base64.b64encode(request.http_auth)
 74 |                 headers.append(("Authorization", "Basic %s" % auths))
 75 | 
 76 |             if request.referer:
 77 |                 headers.append(("Referer", request.referer))
 78 | 
 79 |             opener = urllib2.build_opener(*handlers)
 80 |             opener.addheaders = headers
 81 | 
 82 |             return opener
 83 | 
 84 | 
 85 |         except RedirectException as e:
 86 |             raise
 87 |         except Exception as e:
 88 |             print "\n--->" + url + " " + str(e)
 89 |             raise
 90 | 
 91 |     def get_requests(self):  # Shared.options['process_timeout']
 92 | 
 93 |         if self.request.method == "POST":
 94 |             raise Exception("POST method with urllib is not supported yet")
 95 | 
 96 |         # parent = self.request.parent.url if self.request.parent else ""
 97 | 
 98 |         self.retries_interval = 0.5
 99 | 
100 |         jar_response = cookielib.LWPCookieJar()
101 |         jar_request = cookielib.LWPCookieJar()
102 | 
103 |         html = ""
104 |         set_cookie = []
105 | 
106 |         requests = []
107 | 
108 |         while True:
109 |             try:
110 |                 # Shared.th_lock.acquire()
111 | 
112 |                 for cookie in self.request.cookies:
113 |                     jar_request.set_cookie(cookie.get_cookielib_cookie())
114 | 
115 |                 # Shared.th_lock.release()
116 | 
117 |                 opener = self.urllib2_opener(self.request, jar_response)
118 |                 req = urllib2.Request(url=self.request.url)
119 |                 jar_request.add_cookie_header(req)
120 | 
121 |                 res = opener.open(req, None, self.timeout)
122 | 
123 |                 for cookie in jar_response:
124 |                     set_cookie.append(Cookie(cookie.__dict__, self.request.url))
125 | 
126 |                 ctype = res.info()['Content-Type']
127 |                 if ctype is not None:
128 |                     if ctype.lower().split(";")[0] != "text/html":
129 |                         opener.close()
130 |                         raise NotHtmlException(ERROR_CONTENTTYPE)
131 | 
132 |                 html = res.read()
133 |                 opener.close()
134 | 
135 |                 if html:
136 |                     finder = UrlFinder(html)
137 |                     try:
138 |                         urls = finder.get_urls()
139 |                     except Exception as e:
140 |                         raise
141 | 
142 |                 for url in urls:
143 |                     # @TODO handle FORMS
144 |                     requests.append(Request(REQTYPE_LINK, "GET", url, parent=self.request, set_cookie=set_cookie,
145 |                                             parent_db_id=self.request.db_id))
146 | 
147 |                 break
148 | 
149 |             except RedirectException as e:
150 |                 set_cookie = []
151 |                 for cookie in jar_response:
152 |                     set_cookie.append(Cookie(cookie.__dict__, self.request.url))
153 | 
154 |                 r = Request(REQTYPE_REDIRECT, "GET", str(e), parent=self.request, set_cookie=set_cookie,
155 |                             parent_db_id=self.request.db_id)
156 |                 requests.append(r)
157 |                 break
158 |             except NotHtmlException:
159 |                 raise
160 |             except Exception as e:
161 |                 self.retries -= 1
162 |                 if self.retries == 0: raise
163 |                 time.sleep(self.retries_interval)
164 | 
165 |         return requests
166 | 
167 |     def get_file(self):  # Shared.options['process_timeout']
168 | 
169 |         if self.request.method == "POST":
170 |             raise Exception("get_file: POST method with urllib is not supported yet")
171 | 
172 |         jar_request = cookielib.LWPCookieJar()
173 | 
174 |         cont = ""
175 |         while True:
176 |             try:
177 | 
178 |                 for cookie in self.request.cookies:
179 |                     jar_request.set_cookie(cookie.get_cookielib_cookie())
180 | 
181 |                 opener = self.urllib2_opener(self.request, None, True)
182 |                 req = urllib2.Request(url=self.request.url)
183 |                 jar_request.add_cookie_header(req)
184 |                 res = opener.open(req, None, self.timeout)
185 | 
186 |                 cont = res.read()
187 |                 opener.close()
188 | 
189 |                 break
190 | 
191 |             except Exception as e:
192 |                 self.retries -= 1
193 |                 if self.retries == 0: raise
194 |                 time.sleep(self.retries_interval)
195 | 
196 |         return cont
197 | 


--------------------------------------------------------------------------------
/core/crawl/probe/src/utils.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |     'use strict';
  3 | 
  4 |     const url = require('url');
  5 | 
  6 |     const ArgsParse = require('../node_modules/argparse').ArgumentParser;
  7 | 
  8 | 
  9 |     exports.getOptionsFromArgs = function() {
 10 | 
 11 |         let argumentParser = new ArgsParse();
 12 | 
 13 |         _getArguments(argumentParser);
 14 | 
 15 |         let args = argumentParser.parseArgs();
 16 | 
 17 |         return _getOptions(args);
 18 |     };
 19 | 
 20 |     function _getArguments(argumentParser) {
 21 | 
 22 |         let args;
 23 | 
 24 |         argumentParser.addArgument(
 25 |             '-A',
 26 |             {
 27 |                 help: 'user agent',
 28 |                 dest: 'userAgent',
 29 |                 defaultValue: 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
 30 |             },
 31 |         );
 32 |         argumentParser.addArgument(
 33 |             '-R',
 34 |             {
 35 |                 help: 'random string used to generate random values - the same random string will generate the same random values',
 36 |                 dest: 'random',
 37 |                 defaultValue: 'IsHOulDb34RaNd0MsTR1ngbUt1mN0t',
 38 |             },
 39 |         );
 40 |         argumentParser.addArgument(
 41 |             '-f',
 42 |             {
 43 |                 help: 'do NOT fill values in forms',
 44 |                 dest: 'fillValues',
 45 |                 defaultValue: true,
 46 |                 nargs: 0,
 47 |                 action: 'storeFalse',
 48 |             },
 49 |         );
 50 |         argumentParser.addArgument(
 51 |             '-t',
 52 |             {
 53 |                 help: 'do NOT trigger events (onload only)',
 54 |                 dest: 'triggerEvents',
 55 |                 defaultValue: true,
 56 |                 nargs: 0,
 57 |                 action: 'storeFalse',
 58 |             },
 59 |         );
 60 |         argumentParser.addArgument(
 61 |             '-X',
 62 |             {
 63 |                 help: 'comma separated list of excluded urls',
 64 |                 dest: 'excludedUrls',
 65 |                 defaultValue: '',
 66 |             },
 67 |         );
 68 |         argumentParser.addArgument(
 69 |             '-O',
 70 |             {
 71 |                 help: 'do NOT override timeout functions',
 72 |                 dest: 'overrideTimeoutFunctions',
 73 |                 defaultValue: true,
 74 |                 nargs: 0,
 75 |                 action: 'storeFalse',
 76 |             },
 77 |         );
 78 |         argumentParser.addArgument(
 79 |             '-c',
 80 |             {
 81 |                 help: 'set cookies (json format)',
 82 |                 dest: 'cookies',
 83 |                 defaultValue: '',
 84 |             },
 85 |         );
 86 |         argumentParser.addArgument(
 87 |             '-r',
 88 |             {
 89 |                 help: 'url referer',
 90 |                 dest: 'referer',
 91 |                 defaultValue: '',
 92 |             },
 93 |         );
 94 | 
 95 |         argumentParser.addArgument(
 96 |             '-p',
 97 |             {
 98 |                 help: 'http auth (user:pass)',
 99 |                 dest: 'httpAuth',
100 |                 defaultValue: '',
101 |             },
102 |         );
103 |         argumentParser.addArgument(
104 |             '-P',
105 |             {
106 |                 help: 'load page with POST',
107 |                 dest: 'sendPOST',
108 |                 defaultValue: false,
109 |                 nargs: 0,
110 |                 action: 'storeTrue',
111 |             },
112 |         );
113 |         argumentParser.addArgument(
114 |             '-D',
115 |             {
116 |                 help: 'POST data',
117 |                 dest: 'POSTData',
118 |             },
119 |         );
120 |         argumentParser.addArgument(
121 |             '--proxy',
122 |             {
123 |                 help: 'Proxy address in format "proxy-scheme://proxy-ip:proxy-port"',
124 |                 dest: 'proxyAddress',
125 |                 defaultValue: '',
126 |             },
127 |         );
128 | 
129 |         argumentParser.addArgument(
130 |             '-v',
131 |             {
132 |                 help: 'verbosity level',
133 |                 dest: 'verbosity',
134 |                 action: 'count',
135 |                 defaultValue: 0,
136 |             },
137 |         );
138 | 
139 |         argumentParser.addArgument(
140 |             '--debug',
141 |             {
142 |                 help: 'activate debug mode',
143 |                 dest: 'debug',
144 |                 defaultValue: false,
145 |                 nargs: 0,
146 |                 action: 'storeTrue',
147 |             },
148 |         );
149 | 
150 |         argumentParser.addArgument(
151 |             'startUrl',
152 |             {
153 |                 help: 'starting url',
154 |             },
155 |         );
156 | 
157 |         args = argumentParser.parseArgs();
158 | 
159 |         if (!args.startUrl.startsWith('http')) {
160 |             argumentParser.error('invalid starting url: "' + args.startUrl + '"');
161 |         }
162 | 
163 |         return args;
164 |     }
165 | 
166 |     function _getOptions(args) {
167 |         let options = {};
168 | 
169 |         options.userAgent = args.userAgent;
170 |         options.random = args.random;
171 |         options.fillValues = args.fillValues;
172 |         options.triggerEvents = args.triggerEvents;
173 |         options.excludedUrls = args.excludedUrls !== '' ? args.excludedUrls.split(',') : [];
174 |         options.overrideTimeoutFunctions = args.overrideTimeoutFunctions;
175 |         options.verbosity = args.verbosity;
176 |         options.debug = args.debug;
177 | 
178 |         options.inputValues = _generateRandomValues(options.random);
179 | 
180 |         if (args.cookies !== '') {
181 |             options.cookies = JSON.parse(args.cookies);
182 |         } else {
183 |             options.cookies = [];
184 |         }
185 | 
186 |         if (args.referer !== '') {
187 |             options.referer = args.referer;
188 |         }
189 | 
190 |         if (args.httpAuth !== '') {
191 |             let a = args.httpAuth.split(':');
192 |             options.httpAuth = {
193 |                 username: a[0],
194 |                 password: a[1],
195 |             };
196 |         }
197 | 
198 |         if (args.sendPOST) {
199 |             options.sendPOST = args.sendPOST;
200 |             options.POSTData = args.POSTData;
201 |         }
202 | 
203 |         if (args.proxyAddress !== '') {
204 |             options.proxyAddress = args.proxyAddress;
205 |         }
206 | 
207 |         options.startUrl = url.parse(args.startUrl);
208 | 
209 |         return options;
210 | 
211 |     }
212 | 
213 | 
214 |     /**
215 |      * generate a static map of random values using a "static" seed for input fields
216 |      * the same seed generates the same values
217 |      * generated values MUST be the same for all run of the probe otherwise the same form will look different
218 |      * for example if a page sends a form to itself with input=random1,
219 |      * the same form on the same page (after first post) will became input=random2
220 |      * => form.data1 != form.data2 => form.data2 is considered a different request and it'll be crawled.
221 |      * this process will lead to and infinite loop!
222 |      * @param seed String
223 |      * @return {{}}
224 |      * @private
225 |      */
226 |     function _generateRandomValues(seed) {
227 |         let values = {},
228 |             letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
229 |             numbers = '0123456789',
230 |             symbols = '!#&^;.,?%$*',
231 |             months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
232 |             years = ['1982', '1989', '1990', '1994', '1995', '1996'],
233 |             names = ['james', 'john', 'robert', 'michael', 'william', 'david', 'richard', 'charles', 'joseph', 'thomas', 'christopher', 'daniel', 'paul', 'mark', 'donald', 'george', 'kenneth'],
234 |             surnames = ['anderson', 'thomas', 'jackson', 'white', 'harris', 'martin', 'thompson', 'garcia', 'martinez', 'robinson', 'clark', 'rodriguez', 'lewis', 'lee', 'walker', 'hall'],
235 |             domains = ['.com', '.org', '.net', '.it', '.tv', '.de', '.fr'];
236 | 
237 |         let randoms = [],
238 |             randoms_i = 0;
239 | 
240 |         for (let a = 0; a < seed.length; a++) {
241 |             randoms.push(seed[a].charCodeAt(0));
242 |         }
243 | 
244 |         const rand = function(max) {
245 |             let i = randoms[randoms_i] % max;
246 |             randoms_i = (randoms_i + 1) % randoms.length;
247 |             return i;
248 |         };
249 | 
250 |         const randomizeArray = function(arr, len) {
251 |             let r, ret = '';
252 |             for (let a = 0; a < len; a++) {
253 |                 r = rand(arr.length - 1);
254 |                 ret += arr[r];
255 |             }
256 |             return ret;
257 |         };
258 | 
259 |         let generators = {
260 |             string: function() {
261 |                 return randomizeArray(letters, 8);
262 |             },
263 |             number: function() {
264 |                 return randomizeArray(numbers, 3);
265 |             },
266 |             month: function() {
267 |                 return randomizeArray(months, 1);
268 |             },
269 |             year: function() {
270 |                 return randomizeArray(years, 1);
271 |             },
272 |             date: function() {
273 |                 return generators.year() + '-' + generators.month() + '-' + generators.month();
274 |             },
275 |             color: function() {
276 |                 return '#' + randomizeArray(numbers, 6);
277 |             },
278 |             week: function() {
279 |                 return generators.year() + '-W' + randomizeArray(months.slice(0, 6), 1);
280 |             },
281 |             time: function() {
282 |                 return generators.month() + ':' + generators.month();
283 |             },
284 |             datetimeLocal: function() {
285 |                 return generators.date() + 'T' + generators.time();
286 |             },
287 |             domain: function() {
288 |                 return randomizeArray(letters, 12)
289 |                     .toLowerCase() + randomizeArray(domains, 1);
290 |             },
291 |             email: function() {
292 |                 return randomizeArray(names, 1) + '.' + generators.surname() + '@' + generators.domain();
293 |             },
294 |             url: function() {
295 |                 return 'http://www.' + generators.domain();
296 |             },
297 |             humandate: function() {
298 |                 return generators.month() + '/' + generators.month() + '/' + generators.year();
299 |             },
300 |             password: function() {
301 |                 return randomizeArray(letters, 3) + randomizeArray(symbols, 1) + randomizeArray(letters, 2) + randomizeArray(numbers, 3) + randomizeArray(symbols, 2);
302 |             },
303 |             surname: function() {
304 |                 return randomizeArray(surnames, 1);
305 |             },
306 |             firstname: function() {
307 |                 return randomizeArray(names, 1);
308 |             },
309 |             tel: function() {
310 |                 return '+' + randomizeArray(numbers, 1) + ' ' + randomizeArray(numbers, 10);
311 |             },
312 |         };
313 | 
314 |         for (let type in generators) {
315 |             values[type] = generators[type]();
316 |         }
317 | 
318 |         return values;
319 | 
320 |     }
321 | 
322 | 
323 | })();
324 | 


--------------------------------------------------------------------------------
/core/crawl/probe/src/page-handler.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |     'use strict';
  3 | 
  4 |     const EventEmitter = require('events');
  5 | 
  6 |     const logger = require('../logger').debug;
  7 |     const probe = require('./probe');
  8 | 
  9 |     /**
 10 |      *
 11 |      * @param {Puppeteer} puppeteer
 12 |      * @param {String} proxy - in format: `hostname:port`
 13 |      * @param {boolean} debug - activate debug mode
 14 |      * @return {Promise.<TResult>|*}
 15 |      */
 16 |     exports.getBrowserAndPage = function(puppeteer, proxy, debug) {
 17 |         let browserArgs = [
 18 |             '--no-sandbox',             // in docker
 19 |             '--disable-setuid-sandbox', // in docker
 20 |             '--disable-gpu',        // headless
 21 |             '--hide-scrollbars',    // headless
 22 |             '--mute-audio',         // headless
 23 |             '--ignore-certificate-errors',              // no security
 24 |             '--ignore-certificate-errors-spki-list ',   // no security
 25 |             '--ssl-version-max=tls1.3',                 // no security
 26 |             '--ssl-version-min=tls1',                   // no security
 27 |             '--disable-web-security',                   // no security
 28 |             '--allow-running-insecure-content',         // no security
 29 |             `--load-extension=${__dirname}/../chrome_extension/`,               // load extension
 30 |             `--disable-extensions-except=${__dirname}/../chrome_extension/`,    // load extension
 31 |         ];
 32 | 
 33 | 
 34 |         if (proxy) {
 35 |             browserArgs.push(`--proxy-server=${proxy}`);
 36 |         }
 37 | 
 38 |         let launchParams = {
 39 |             headless: false,
 40 |             ignoreHTTPSErrors: true,
 41 |             args: browserArgs,
 42 |         };
 43 | 
 44 |         if (debug) {
 45 |             launchParams['dumpio'] = true;
 46 |             launchParams['devtools'] = true;
 47 |         }
 48 | 
 49 |         return puppeteer.launch(launchParams)
 50 |             .then(createdBrowser => {
 51 |                 return createdBrowser.newPage()
 52 |                     .then(createdPage => {
 53 |                         return [createdBrowser, createdPage];
 54 |                     });
 55 |             });
 56 |     };
 57 | 
 58 |     class Handler extends EventEmitter {
 59 |         /**
 60 |          * @constructor
 61 |          */
 62 |         constructor(page, constants, options) {
 63 |             super();
 64 |             this._page = page;
 65 |             this._constants = constants;
 66 |             this._options = options;
 67 |             this._lastRedirectResponse = undefined;
 68 |             this._reformatFirstRequest = (options.referer || options.sendPOST);
 69 |         }
 70 | 
 71 |         initialize() {
 72 |             this._page.on('request', interceptedRequest => {
 73 |                 if (this._options.verbosity >= 2) {
 74 |                     logger.debug(`intercepted request: ${interceptedRequest.resourceType()} ${interceptedRequest.url()}`);
 75 |                 }
 76 | 
 77 |                 // block image loading
 78 |                 if (interceptedRequest.resourceType() === 'image') {
 79 |                     if (this._options.verbosity >= 2) {
 80 |                         logger.debug(`abort request: ${interceptedRequest.resourceType()} ${interceptedRequest.url()}`);
 81 |                     }
 82 |                     interceptedRequest.abort();
 83 | 
 84 |                     // Block redirect
 85 |                     // Since no option exist in puppeteer, this is the workaround proposed here:
 86 |                     // https://github.com/GoogleChrome/puppeteer/issues/1132#issuecomment-339420642
 87 |                 } else if (this._lastRedirectResponse && this._lastRedirectResponse.headers().location === interceptedRequest.url()) {
 88 |                     this.getCookies()
 89 |                         .then(cookies => {
 90 | 
 91 |                             let cookiesResult = ['cookies', cookies],
 92 |                                 status = {'status': 'ok', 'redirect': interceptedRequest.url()};
 93 |                             this.emit(Handler.Events.ProbeResult, cookiesResult);
 94 |                             this.emit(Handler.Events.Finished, 0, status);
 95 | 
 96 |                             if (this._options.verbosity >= 3) {
 97 |                                 logger.debug(`abort request: ${interceptedRequest.resourceType()} ${interceptedRequest.url()}`);
 98 |                             }
 99 | 
100 |                             interceptedRequest.abort();
101 |                         });
102 |                     // Set the first request as POST or/and Headers
103 |                     // Since the feature is missing, handling it here.
104 |                     // https://github.com/GoogleChrome/puppeteer/issues/1062
105 |                 } else if (this._reformatFirstRequest) {
106 | 
107 |                     let overrides = {headers: interceptedRequest.headers()};
108 | 
109 |                     if (this._options.sendPOST) {
110 |                         overrides.method = 'POST';
111 |                         overrides.postData = this._options.POSTData || undefined;
112 |                     }
113 | 
114 |                     if (this._options.referer) {
115 |                         overrides.headers['Referer'] = this._options.referer;
116 |                     }
117 | 
118 |                     if (this._options.verbosity >= 2) {
119 |                         logger.debug(`accept request with overrides: ${interceptedRequest.resourceType()} ${interceptedRequest.url()}`);
120 |                     }
121 | 
122 |                     interceptedRequest.continue(overrides)
123 |                         .then(() => {
124 |                             this._reformatFirstRequest = false;
125 |                         });
126 | 
127 |                 } else {
128 | 
129 |                     if (this._options.verbosity >= 2) {
130 |                         logger.debug(`accept request: ${interceptedRequest.resourceType()} ${interceptedRequest.url()}`);
131 |                     }
132 |                     interceptedRequest.continue();
133 |                 }
134 | 
135 |             });
136 | 
137 |             this._page.on('response', response => {
138 |                 if (_isRedirect(response)) {
139 |                     this._lastRedirectResponse = response;
140 |                 }
141 |             });
142 | 
143 |             this._page.on('dialog', dialog => {
144 |                 if (this._options.verbosity >= 3) {
145 |                     logger.debug(`Page dialog, type "${dialog.type()}": "${dialog.message()}"`);
146 |                 }
147 |                 dialog.accept();
148 |             });
149 | 
150 |             this._page.on('error', error => {
151 |                 if (this._options.verbosity >= 1) {
152 |                     logger.error(`Page crash: "${error.code}", "${error.message()}"`);
153 |                 }
154 |                 let status = {'status': 'error', 'code': 'pageCrash', 'message': `Page crash with: "${error.code}", "${error.message()}"`};
155 |                 this.emit(Handler.Events.Finished, 1, status);
156 |             });
157 | 
158 |             this._page.on('framenavigated', frameTo => {
159 |                 if (this._options.verbosity >= 2) {
160 |                     logger.debug(`framenavigated to ${frameTo.url()}`);
161 |                 }
162 |             });
163 | 
164 |             this._page.on('console', consoleMessage => {
165 |                 if (this._options.verbosity >= 1) {
166 |                     if (['error', 'warning', 'trace'].includes(consoleMessage.type())) {
167 |                         logger.warn(`Page console error message : "${consoleMessage.text()}"`);
168 |                     } else if (consoleMessage.type() === 'info' && this._options.verbosity >= 2) {
169 |                         logger.info(`Page console message : ${consoleMessage.text()}`);
170 |                     } else if (consoleMessage.type() === 'log' && this._options.verbosity >= 3) {
171 |                         logger.debug(`Page console message : "${consoleMessage.text()}"`);
172 |                     } else if (this._options.verbosity >= 4) {
173 |                         logger.debug(`Page console message, type ${consoleMessage.type()} : "${consoleMessage.text()}"`);
174 |                     }
175 |                 }
176 |             });
177 | 
178 |             this._page.on('frameattached', frameTo => {
179 |                 if (this._options.verbosity >= 2) {
180 |                     logger.debug(`frameattached to ${frameTo.url()}`);
181 |                 }
182 |             });
183 | 
184 |             this._page.on('requestfailed', failedRequest => {
185 |                 if (this._options.verbosity >= 2) {
186 |                     logger.debug(`requestfailed: ${failedRequest.url()}`);
187 |                 }
188 |             });
189 | 
190 |             this._page.on('requestfinished', finishedRequest => {
191 |                 if (this._options.verbosity >= 2) {
192 |                     logger.debug(`requestfinished: ${finishedRequest.response()
193 |                         .status()}, ${finishedRequest.method} ${finishedRequest.url()}`);
194 |                 }
195 |             });
196 | 
197 |             this._page.on('load', () => {
198 |                 if (this._options.verbosity >= 1) {
199 |                     logger.info('load done');
200 |                 }
201 |             });
202 | 
203 | 
204 |             // set function to return value from probe
205 |             this._page.exposeFunction('__PROBE_FN_RETURN_REQUEST__', (request) => {
206 |                 if (this._options.verbosity >= 2) {
207 |                     logger.info(`Found request: ${JSON.stringify(request[1])}`);
208 |                 }
209 |                 this.emit(Handler.Events.ProbeResult, request);
210 |             });
211 | 
212 |             // set function to request end from probe
213 |             this._page.exposeFunction('__PROBE_FN_REQUEST_END__', () => {
214 |                 if (this._options.verbosity >= 1) {
215 |                     logger.info('Probe finished');
216 |                 }
217 |                 let status = {'status': 'ok'};
218 |                 this.emit(Handler.Events.Finished, 0, status);
219 |             });
220 | 
221 |             return Promise.all([
222 |                 this._page.setUserAgent(this._options.userAgent),
223 |                 this._page.setCookie(...this._options.cookies),
224 |                 this._page.setViewport(this._constants.viewport),
225 |                 this._page.setRequestInterception(true),
226 |                 this._page.authenticate(this._options.httpAuth),
227 |             ])
228 |                 .then(() => {
229 |                     this._setProbe();
230 |                     return this._page;
231 |                 });
232 |         }
233 | 
234 |         _setProbe() {
235 |             // on every new document, initializing the probe into the page context
236 |             this._page.evaluateOnNewDocument(probe.setProbe, ...[this._options, this._constants]);
237 |         }
238 | 
239 |         startProbe() {
240 |             this._page.evaluate(() => {
241 |                 window.__PROBE__.startAnalysis();
242 |             });
243 |         }
244 | 
245 |         /**
246 |          * @return {Promise|Cookie}
247 |          */
248 |         getCookies() {
249 |             return this._page.cookies();
250 |         }
251 |     }
252 | 
253 |     Handler.Events = {
254 |         Finished: 'finished',
255 |         ProbeResult: 'probeResult',
256 |     };
257 | 
258 |     function _isRedirect(response) {
259 |         return [301, 302, 303, 307, 308].includes(response.status()) && response.request()
260 |             .resourceType() === 'document';
261 |     }
262 | 
263 |     exports.Handler = Handler;
264 | 
265 | })();
266 | 


--------------------------------------------------------------------------------
/core/util/utilities/htmlreport/style.css:
--------------------------------------------------------------------------------
  1 | body{
  2 | 	font-family:Helvetica;
  3 | 	margin:0;
  4 | 	padding:0;
  5 | 	overflow:auto;
  6 | }
  7 | html{
  8 | 	height:100%;
  9 | }
 10 | a{
 11 | 	text-decoration:none;
 12 | 	outline: 0;
 13 | 	color:#000;
 14 | }
 15 | a:active{
 16 | 	color:#000;
 17 | }
 18 | hr{
 19 |     border: 0;
 20 |     height: 1px;
 21 |     background-image: linear-gradient(to right, #000000 1%,#ffffff 100%);
 22 | }
 23 | label{
 24 | 	cursor:pointer;
 25 | }
 26 | /*.row{
 27 | 
 28 | 
 29 | }*/
 30 | 
 31 | 
 32 | .accordion{
 33 | 
 34 | }
 35 | 
 36 | .accordion-closed{
 37 | 	height:0px;
 38 | 	overflow:hidden;
 39 | }
 40 | 
 41 | 
 42 | /*.accordion-open{
 43 | 	overflow:visible;
 44 | }*/
 45 | 
 46 | 
 47 | 
 48 | .parent-url{
 49 | 	margin:0;
 50 | 	color:gray;
 51 | 	font-size:11px;
 52 | }
 53 | 
 54 | .url-post-data, .result-post-data{
 55 | 	margin:0;
 56 | 	color:#5A5A60;
 57 | 	font-size:12px;
 58 | }
 59 | 
 60 | .mainAccordion{
 61 | 	margin:0px 0px 0px 20px;
 62 | 	border-left:1px dotted #047288;
 63 | 	padding-left:6px;
 64 | }
 65 | 
 66 | .result-accordion-hdr{
 67 | 	margin:3px 3px 3px -3px;
 68 | 	font-weight:bold;
 69 | 	cursor:pointer;
 70 | }
 71 | 
 72 | 
 73 | .result-accordion-hdr.hdr-accordion-open:before{
 74 | 	line-height:24px;
 75 | 	color:gray;
 76 | 	font-size:10px;
 77 | 	content: "▼ ";
 78 | }
 79 | 
 80 | .result-accordion-hdr.hdr-accordion-closed:before{
 81 | 	line-height:24px;
 82 | 	color:gray;
 83 | 	font-size:10px;
 84 | 	content: "► ";
 85 | }
 86 | 
 87 | /* must have margin and padding = 0 to be collapsed on scroll */
 88 | #collapse_top{
 89 | 	position:relative;
 90 | 	overflow: hidden;
 91 | 	padding:0px;
 92 | 	margin:0px;
 93 | }
 94 | 
 95 | #top_header{
 96 | 	margin:10px 0 0 10px;
 97 | }
 98 | 
 99 | #title{
100 | 	margin:0 0 10px 0;
101 | 	padding:0;
102 | 	font-weight:bold;
103 | }
104 | 
105 | #top{
106 | 	position:fixed;
107 | 	background-color:#fff;
108 | 	z-index:777;
109 | 	top:0;
110 | 	left:0;
111 | 	margin:0;
112 | 	min-width:900px;
113 | 	width:100%;
114 | }
115 | #top hr{
116 | 	max-width:1200px;
117 | 	margin:0;
118 | }
119 | 
120 | #filtersbox{
121 | 	margin:10px 0 0 10px;
122 | }
123 | 
124 | .hidden{ display: none !important}
125 | 
126 | .icon {
127 | 	display: inline-block;
128 | 
129 | 	background-color:#047288;
130 | 	padding:4px;
131 | 	padding-top:2px;
132 | 	padding-bottom:2px;
133 | 	border-radius:2px;
134 | 	color:#fff;
135 | 	margin-right:4px;
136 | 	margin-left:1px;
137 | 	cursor:default;
138 | 	font-size:10px;
139 | }
140 | 
141 | section{
142 | 	margin:0px 0px 10px 10px;
143 | 	padding:0;
144 | 	white-space: nowrap;
145 | }
146 | 
147 | section.marked{
148 | 	border-left:4px solid #00F500;
149 | 	margin-left:6px;
150 | }
151 | 
152 | section.marked .mark-button:before{
153 | 	content:"un";
154 | }
155 | 
156 | .icon.icon-filtered{
157 | 	opacity:0.5;
158 | }
159 | .open-new-win{
160 | 	display:inline-block;
161 | 	background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAKQWlDQ1BJQ0MgUHJvZmlsZQAASA2dlndUU9kWh8+9N73QEiIgJfQaegkg0jtIFQRRiUmAUAKGhCZ2RAVGFBEpVmRUwAFHhyJjRRQLg4Ji1wnyEFDGwVFEReXdjGsJ7601896a/cdZ39nnt9fZZ+9917oAUPyCBMJ0WAGANKFYFO7rwVwSE8vE9wIYEAEOWAHA4WZmBEf4RALU/L09mZmoSMaz9u4ugGS72yy/UCZz1v9/kSI3QyQGAApF1TY8fiYX5QKUU7PFGTL/BMr0lSkyhjEyFqEJoqwi48SvbPan5iu7yZiXJuShGlnOGbw0noy7UN6aJeGjjAShXJgl4GejfAdlvVRJmgDl9yjT0/icTAAwFJlfzOcmoWyJMkUUGe6J8gIACJTEObxyDov5OWieAHimZ+SKBIlJYqYR15hp5ejIZvrxs1P5YjErlMNN4Yh4TM/0tAyOMBeAr2+WRQElWW2ZaJHtrRzt7VnW5mj5v9nfHn5T/T3IevtV8Sbsz55BjJ5Z32zsrC+9FgD2JFqbHbO+lVUAtG0GQOXhrE/vIADyBQC03pzzHoZsXpLE4gwnC4vs7GxzAZ9rLivoN/ufgm/Kv4Y595nL7vtWO6YXP4EjSRUzZUXlpqemS0TMzAwOl89k/fcQ/+PAOWnNycMsnJ/AF/GF6FVR6JQJhIlou4U8gViQLmQKhH/V4X8YNicHGX6daxRodV8AfYU5ULhJB8hvPQBDIwMkbj96An3rWxAxCsi+vGitka9zjzJ6/uf6Hwtcim7hTEEiU+b2DI9kciWiLBmj34RswQISkAd0oAo0gS4wAixgDRyAM3AD3iAAhIBIEAOWAy5IAmlABLJBPtgACkEx2AF2g2pwANSBetAEToI2cAZcBFfADXALDIBHQAqGwUswAd6BaQiC8BAVokGqkBakD5lC1hAbWgh5Q0FQOBQDxUOJkBCSQPnQJqgYKoOqoUNQPfQjdBq6CF2D+qAH0CA0Bv0BfYQRmALTYQ3YALaA2bA7HAhHwsvgRHgVnAcXwNvhSrgWPg63whfhG/AALIVfwpMIQMgIA9FGWAgb8URCkFgkAREha5EipAKpRZqQDqQbuY1IkXHkAwaHoWGYGBbGGeOHWYzhYlZh1mJKMNWYY5hWTBfmNmYQM4H5gqVi1bGmWCesP3YJNhGbjS3EVmCPYFuwl7ED2GHsOxwOx8AZ4hxwfrgYXDJuNa4Etw/XjLuA68MN4SbxeLwq3hTvgg/Bc/BifCG+Cn8cfx7fjx/GvyeQCVoEa4IPIZYgJGwkVBAaCOcI/YQRwjRRgahPdCKGEHnEXGIpsY7YQbxJHCZOkxRJhiQXUiQpmbSBVElqIl0mPSa9IZPJOmRHchhZQF5PriSfIF8lD5I/UJQoJhRPShxFQtlOOUq5QHlAeUOlUg2obtRYqpi6nVpPvUR9Sn0vR5Mzl/OX48mtk6uRa5Xrl3slT5TXl3eXXy6fJ18hf0r+pvy4AlHBQMFTgaOwVqFG4bTCPYVJRZqilWKIYppiiWKD4jXFUSW8koGStxJPqUDpsNIlpSEaQtOledK4tE20Otpl2jAdRzek+9OT6cX0H+i99AllJWVb5SjlHOUa5bPKUgbCMGD4M1IZpYyTjLuMj/M05rnP48/bNq9pXv+8KZX5Km4qfJUilWaVAZWPqkxVb9UU1Z2qbapP1DBqJmphatlq+9Uuq43Pp893ns+dXzT/5PyH6rC6iXq4+mr1w+o96pMamhq+GhkaVRqXNMY1GZpumsma5ZrnNMe0aFoLtQRa5VrntV4wlZnuzFRmJbOLOaGtru2nLdE+pN2rPa1jqLNYZ6NOs84TXZIuWzdBt1y3U3dCT0svWC9fr1HvoT5Rn62fpL9Hv1t/ysDQINpgi0GbwaihiqG/YZ5ho+FjI6qRq9Eqo1qjO8Y4Y7ZxivE+41smsImdSZJJjclNU9jU3lRgus+0zwxr5mgmNKs1u8eisNxZWaxG1qA5wzzIfKN5m/krCz2LWIudFt0WXyztLFMt6ywfWSlZBVhttOqw+sPaxJprXWN9x4Zq42Ozzqbd5rWtqS3fdr/tfTuaXbDdFrtOu8/2DvYi+yb7MQc9h3iHvQ732HR2KLuEfdUR6+jhuM7xjOMHJ3snsdNJp9+dWc4pzg3OowsMF/AX1C0YctFx4bgccpEuZC6MX3hwodRV25XjWuv6zE3Xjed2xG3E3dg92f24+ysPSw+RR4vHlKeT5xrPC16Il69XkVevt5L3Yu9q76c+Oj6JPo0+E752vqt9L/hh/QL9dvrd89fw5/rX+08EOASsCegKpARGBFYHPgsyCRIFdQTDwQHBu4IfL9JfJFzUFgJC/EN2hTwJNQxdFfpzGC4sNKwm7Hm4VXh+eHcELWJFREPEu0iPyNLIR4uNFksWd0bJR8VF1UdNRXtFl0VLl1gsWbPkRoxajCCmPRYfGxV7JHZyqffS3UuH4+ziCuPuLjNclrPs2nK15anLz66QX8FZcSoeGx8d3xD/iRPCqeVMrvRfuXflBNeTu4f7kufGK+eN8V34ZfyRBJeEsoTRRJfEXYljSa5JFUnjAk9BteB1sl/ygeSplJCUoykzqdGpzWmEtPi000IlYYqwK10zPSe9L8M0ozBDuspp1e5VE6JA0ZFMKHNZZruYjv5M9UiMJJslg1kLs2qy3mdHZZ/KUcwR5vTkmuRuyx3J88n7fjVmNXd1Z752/ob8wTXuaw6thdauXNu5Tnddwbrh9b7rj20gbUjZ8MtGy41lG99uit7UUaBRsL5gaLPv5sZCuUJR4b0tzlsObMVsFWzt3WazrWrblyJe0fViy+KK4k8l3JLr31l9V/ndzPaE7b2l9qX7d+B2CHfc3em681iZYlle2dCu4F2t5czyovK3u1fsvlZhW3FgD2mPZI+0MqiyvUqvakfVp+qk6oEaj5rmvep7t+2d2sfb17/fbX/TAY0DxQc+HhQcvH/I91BrrUFtxWHc4azDz+ui6rq/Z39ff0TtSPGRz0eFR6XHwo911TvU1zeoN5Q2wo2SxrHjccdv/eD1Q3sTq+lQM6O5+AQ4ITnx4sf4H++eDDzZeYp9qukn/Z/2ttBailqh1tzWibakNml7THvf6YDTnR3OHS0/m/989Iz2mZqzymdLz5HOFZybOZ93fvJCxoXxi4kXhzpXdD66tOTSna6wrt7LgZevXvG5cqnbvfv8VZerZ645XTt9nX297Yb9jdYeu56WX+x+aem172296XCz/ZbjrY6+BX3n+l37L972un3ljv+dGwOLBvruLr57/17cPel93v3RB6kPXj/Mejj9aP1j7OOiJwpPKp6qP6391fjXZqm99Oyg12DPs4hnj4a4Qy//lfmvT8MFz6nPK0a0RupHrUfPjPmM3Xqx9MXwy4yX0+OFvyn+tveV0auffnf7vWdiycTwa9HrmT9K3qi+OfrW9m3nZOjk03dp76anit6rvj/2gf2h+2P0x5Hp7E/4T5WfjT93fAn88ngmbWbm3/eE8/syOll+AAAACXBIWXMAAAsTAAALEwEAmpwYAAABh0lEQVQ4EY2Svy4EURTGZ+6dRbIRRKFR0NoHEI3EU3gExYbEJhIKBQ1WIzyGKLReQisKofQAbIKZ6/ft3DNmk5nESb4595zv/LvnTpokSQo8aJIiOqV7zrlt9A8YAV8UxV3GIUQnqlE6eAvv/Tr6OITwnqbpkiIp2FX3OcgtdH0KFe3kef6IfgYOyBdIOqTAGeeEYieqskOB0IQsy64UiMzoQ2xfcegheAWXcg5w6l49sAxWwUrUXbS6K24vJp/L5jwCNyL2OXzh0z4ahZjdmHxhAfj6TLhhE3xDzEdShbQPLU/8QUweykamwN++CNAVVGBWLKLFChIP/wZ/XZqVX6YauLaxtXFJzluvoT/GVlnwCN8t9hNw4wVFsk0p2UZe4AlPmWgzBvv/FNB1bCLl6a/U0iWhfgW7t+kypPyaT1pNzZ54ujxm2P8fzbEyznTFaQKNJ8KmMV8VxEFd9VLiJhqYY5rFvEBqtGo8znVRI7uC7STNeJJ73noRUj9IW3K90Cc5D9GR/wJQ+1+DjtkA4QAAAABJRU5ErkJggg==);
162 | 	width:12px;
163 | 	height:12px;
164 | 	margin-left:6px;
165 | 	margin-right:2px;
166 | 	background-size: 100% auto;
167 | }
168 | 
169 | #urlhider, #reshider{
170 | 	width:300px;
171 | 	height:40px;
172 | }
173 | 
174 | 
175 | .modal-bar{
176 | 	background-color:#E1EBF7;
177 | 	padding:3px;
178 | 	margin:0 0 6px 0;
179 | 	border:1px solid #fff;
180 | 	border-bottom:1px solid #C6DCF5;
181 | }
182 | 
183 | .modal-content{
184 | 	position:relative;
185 | 	margin:0;
186 | 	padding:0;
187 | 	height:calc(100% - 40px);
188 | 	width:100%;
189 | 	overflow:auto;
190 | }
191 | 
192 | 
193 | /*#trash section, #marked section{
194 | 	display:block !important;
195 | }
196 | */
197 | 
198 | .modal{
199 | 	position:fixed;
200 | 	top:10px;
201 | 	left:10px;
202 | 	border:1px solid #047288;
203 | 	background-color:#fff;
204 | 	width:calc(100% - 20px);
205 | 	height:calc(100% - 20px);
206 | 	overflow:auto;
207 | 	z-index:888;
208 | 	box-shadow: 5px 5px 2px #ddd;
209 | }
210 | 
211 | #buttons{
212 | 	position:relative;
213 | 	margin:10px 0 5px 10;
214 | 	padding:0;
215 | }
216 | 
217 | .button,.button-spacer{
218 | 	display:inline-block;
219 | 	height:16px;
220 | 	padding:0 4px 1px 4px;
221 | 	font-size:13px;
222 | 	-webkit-user-select: none;
223 | 	-moz-user-select: none;
224 | 	-ms-user-select: none; 
225 | }
226 | 
227 | 
228 | .button-spacer{
229 | 	border-left:1px dotted gray;
230 | 	width:1px;
231 | }
232 | 
233 | .button{
234 | 	border:1px solid gray;
235 | 	min-width:60px;
236 | 	cursor:pointer;
237 | 	text-align:center;
238 | 	margin-right:10px;
239 | }
240 | 
241 | .button:active{
242 | 	opacity:0.6;
243 | }
244 | 
245 | /*#save_status{
246 | 	display:inline-block;
247 | 	padding:0;
248 | 	margin:0;
249 | }
250 | #save_status input{
251 | 	display:inline-block;
252 | 	height:16px;
253 | 	padding:0 4px 1px 4px;
254 | 	border: 1px dashed gray;
255 | }
256 | */
257 | .result-button{
258 | 	margin-top:10px;
259 | }
260 | 
261 | /*@keyframes button-fadein {
262 | 	from {opacity:0.5;}
263 | 	to {opacity:1;}
264 | }*/
265 | 
266 | .button-delayed{
267 | 
268 |    -webkit-transition: opacity 0.6s ease-in;
269 |        -moz-transition: opacity 0.6s ease-in;
270 |         -ms-transition: opacity 0.6s ease-in;
271 |          -o-transition: opacity 0.6s ease-in;
272 |             transition: opacity 0.6s ease-in;
273 |     opacity:0.5;
274 | }
275 | .button-delayed:hover{
276 | 	opacity:1;
277 | 
278 | }
279 | /*#trash .trash-button{
280 | 	opacity:1;
281 | }
282 | */
283 | #trash .trash-button:before{
284 | 	content:"un";
285 | }
286 | 
287 | 
288 | #trash-close, #marked-close, #outofscope-close, #vulnerability-close, #nonhtml-close, #notes-close{
289 | 	position:absolute;
290 | 	right:10px;
291 | 	top:0px;
292 | 	font-size:22px;
293 | 	cursor:pointer;
294 | }
295 | #trash-close:hover, #marked-close:hover, #outofscope-close:hover, #nonhtml-close:hover, #notes-close:hover{
296 | 	color:#F04158;
297 | }
298 | 
299 | 
300 | .resbuttons{
301 | 	margin-top:10px;
302 | }
303 | 
304 | #infos{
305 | 
306 | }
307 | .addregexp{
308 | 	margin:2px 0 0 0;
309 | 	font-size:12px;
310 | 	cursor:pointer;
311 | 	text-decoration:underline;
312 | }
313 | 
314 | .url{
315 | 	cursor:pointer;
316 | 	display:inline-block;
317 | 	vertical-align: bottom;
318 | 
319 | 
320 | 	white-space:nowrap;
321 | 
322 | 	padding:0;
323 | 	margin:0;
324 | 
325 | 	/*border-right-width: 13px;
326 | 	border-right-style: solid;
327 | 	border-image-slice: 1;
328 | 	border-image:linear-gradient(to right, rgba(255,255,255,0) 0%,rgba(0,0,0,1) 100%);;*/
329 | }
330 | 
331 | #error_container{
332 | 	font-size:12px;
333 | 	color:red;
334 | }
335 | 
336 | .url-error{
337 | 	color:red;
338 | }
339 | 
340 | .url-outofscope{
341 | 	color:gray;
342 | }
343 | 
344 | 
345 | .icon-hidden{
346 | 	opacity:0.05 !important;
347 | }
348 | 
349 | .result{
350 | 	font-size:14px;
351 | }
352 | 
353 | 
354 | #outofscope p{
355 | 	margin: 2px 0 6px 4px;
356 | }
357 | #outofscope p .parent-url{
358 | 	padding-left:6px;
359 | }
360 | 
361 | #nonhtml p{
362 | 	margin: 2px 0 6px 4px;
363 | }
364 | #nonhtml p .parent-url{
365 | 	padding-left:6px;
366 | }
367 | 
368 | .result-counter{
369 | 	display: inline-block;
370 | 	font-weight:normal;
371 | 	background-color:#fff;
372 | 	margin:0;
373 | 	font-size:9px;
374 | 	margin:0px 0 -2px -1px;
375 | 	border:1px solid #047288;
376 | 	border-radius:6px;
377 | 	padding:0px 2px 0px 2px;
378 | 	vertical-align: bottom;
379 | }
380 | 
381 | .results{
382 | 	margin-bottom:8px;
383 | }
384 | 
385 | !.accordion-open + p{
386 | 	color:red;
387 | }
388 | 
389 | #vulnerability pre{
390 | 	padding:5px;
391 | }
392 | 
393 | .vuln-name{
394 | 	cursor:pointer;
395 | }
396 | 
397 | 
398 | span.trigger{
399 | 	display: inline-block;
400 | 	margin:0 4px 0 0;
401 | 	padding: 1;
402 | 
403 | 	min-width:12px;
404 | 	height:12px;
405 | 	cursor:pointer;
406 | 	text-align:center;
407 | 	font-size:13px;
408 | 	line-height:14px;
409 | 	color:#02582F;
410 | }
411 | 
412 | span.trigger.empty{
413 | 	opacity:.1;
414 | 	cursor:default;
415 | }
416 | 
417 | 
418 | 
419 | #notes textarea{
420 | 	display:block;
421 | 	margin:0 auto;
422 | 	width:95%;
423 | 	height:100%;
424 | 	font-size:14px;
425 | 	padding:10px;
426 | }


--------------------------------------------------------------------------------
/core/lib/database.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- 
  2 | 
  3 | """
  4 | HTCAP - beta 1
  5 | Author: filippo.cavallarin@wearesegment.com
  6 | 
  7 | This program is free software; you can redistribute it and/or modify it under 
  8 | the terms of the GNU General Public License as published by the Free Software 
  9 | Foundation; either version 2 of the License, or (at your option) any later 
 10 | version.
 11 | """
 12 | import json
 13 | import sqlite3
 14 | 
 15 | from core.lib.request import Request
 16 | 
 17 | 
 18 | class Database:
 19 |     def __init__(self, dbname):
 20 |         """
 21 |         constructor
 22 | 
 23 |         :param dbname: name of the database
 24 |         """
 25 |         self.dbname = dbname
 26 |         self.conn = None
 27 | 
 28 |     def __str__(self):
 29 |         return self.dbname
 30 | 
 31 |     def connect(self):
 32 |         """
 33 |         open connection
 34 |         """
 35 |         self.conn = sqlite3.connect(self.dbname)
 36 |         self.conn.row_factory = sqlite3.Row
 37 | 
 38 |     def close(self):
 39 |         """
 40 |         close connection
 41 |         """
 42 |         self.conn.close()
 43 | 
 44 |     def begin(self):
 45 |         """
 46 |         send a "BEGIN TRANSACTION" command
 47 |         """
 48 |         self.conn.isolation_level = None
 49 |         self.conn.execute(_BEGIN_TRANSACTION_QUERY)
 50 | 
 51 |     def commit(self):
 52 |         """
 53 |         commit transaction(s) to the current database
 54 |         """
 55 |         self.conn.commit()
 56 | 
 57 |     def initialize(self):
 58 |         """
 59 |         connect, create the base structure then close connection
 60 |         """
 61 | 
 62 |         self.connect()
 63 | 
 64 |         cur = self.conn.cursor()
 65 |         cur.execute(_CREATE_CRAWL_INFO_TABLE_QUERY)
 66 |         cur.execute(_CREATE_REQUEST_TABLE_QUERY)
 67 |         cur.execute(_CREATE_REQUEST_INDEX_QUERY)
 68 |         cur.execute(_CREATE_REQUEST_CHILD_TABLE_QUERY)
 69 |         cur.execute(_CREATE_REQUEST_CHILD_INDEX_QUERY)
 70 |         cur.execute(_CREATE_ASSESSMENT_TABLE_QUERY)
 71 |         cur.execute(_CREATE_VULNERABILITY_TABLE_QUERY)
 72 | 
 73 |         self.commit()
 74 |         self.close()
 75 | 
 76 |     def save_crawl_info(self,
 77 |                         htcap_version=None, target=None, start_date=None, commandline=None,
 78 |                         user_agent=None, start_cookies=[]):
 79 |         """
 80 |         connect, save the provided crawl info then close the connection
 81 |     
 82 |         :param start_cookies: start cookies provided by the user
 83 |         :param htcap_version: version of the running instance of htcap
 84 |         :param target: start url of the crawl
 85 |         :param start_date: start date of the crawl
 86 |         :param commandline: parameter given to htcap for the crawl
 87 |         :param user_agent: user defined agent
 88 |         :return: the id of the crawl
 89 |         """
 90 |         values = [htcap_version, target, start_date, commandline, user_agent,
 91 |                   json.dumps([c.get_dict() for c in start_cookies])]
 92 | 
 93 |         insert_query = "INSERT INTO crawl_info (htcap_version,target,start_date,commandline,user_agent,start_cookies) VALUES (?,?,?,?,?,?)"
 94 | 
 95 |         self.connect()
 96 |         cur = self.conn.cursor()
 97 |         cur.execute(insert_query, values)
 98 |         cur.execute("SELECT last_insert_rowid() AS id")  # retrieve its id
 99 |         crawl_id = cur.fetchone()['id']
100 |         self.commit()
101 |         self.close()
102 | 
103 |         return crawl_id
104 | 
105 |     def update_crawl_info(self, crawl_id, crawl_end_date, random_seed, end_cookies):
106 |         """
107 |         connect, save the end date then close the connection
108 |         :param crawl_id: 
109 |         :param crawl_end_date: 
110 |         :param random_seed:
111 |         :param end_cookies:
112 |         """
113 |         update_crawl_query = "UPDATE crawl_info SET end_date = ?, random_seed = ?, end_cookies = ? WHERE rowid = ?"
114 | 
115 |         self.connect()
116 |         cur = self.conn.cursor()
117 |         cur.execute(update_crawl_query,
118 |                     [crawl_end_date, random_seed, json.dumps([c.get_dict() for c in end_cookies]), crawl_id])
119 |         self.commit()
120 |         self.close()
121 | 
122 |     def save_request(self, request):
123 |         """
124 |         save the given request (do NOT open or close the connection)
125 |     
126 |         if it is a new request (do not exist in the db), it is inserted.
127 |         if it has a parent request, it is bound to it
128 |     
129 |         :param request: request to be saved
130 |         """
131 | 
132 |         insert_values = (
133 |             request.parent_db_id,
134 |             request.type,
135 |             request.method,
136 |             request.url,
137 |             request.referer,
138 |             request.redirects,
139 |             request.data,
140 |             json.dumps([r.get_dict() for r in request.cookies]),
141 |             request.http_auth if request.http_auth else "",
142 |             1 if request.out_of_scope else 0,
143 |             json.dumps(request.trigger) if request.trigger else "",
144 |             json.dumps(request.user_output) if len(request.user_output) > 0 else ""
145 |         )
146 |         insert_query = "INSERT INTO request (id_parent, type, method, url, referer, redirects, data, cookies, http_auth, out_of_scope, trigger, user_output) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)"
147 | 
148 |         # ignore referrer and cookies.. correct?
149 |         select_values = (
150 |             request.type,
151 |             request.method,
152 |             request.url,
153 |             request.http_auth if request.http_auth else "",
154 |             request.data,
155 |             json.dumps(request.trigger) if request.trigger else ""
156 |         )
157 | 
158 |         # include trigger in query to save the same request with different triggers
159 |         # (normally requests are compared using type,method,url and data only)
160 |         select_query = "SELECT * FROM request WHERE type=? AND method=? AND url=? AND http_auth=? AND data=? AND trigger=?"
161 | 
162 |         cur = self.conn.cursor()
163 |         cur.execute(select_query, select_values)
164 |         existing_req = cur.fetchone()
165 | 
166 |         if not existing_req:  # if no existing request
167 |             cur.execute(insert_query, insert_values)  # insert the new request
168 |             cur.execute("SELECT last_insert_rowid() AS id")  # retrieve its id
169 |             request.db_id = cur.fetchone()['id']  # complete the request with the db_id
170 |         else:
171 |             request.db_id = existing_req['id']  # set the db_id for the request
172 | 
173 |         req_id = request.db_id
174 | 
175 |         # set the parent-child relationships
176 |         if request.parent_db_id:
177 |             qry_child = "INSERT INTO request_child (id_request, id_child) VALUES (?,?)"
178 |             cur.execute(qry_child, (request.parent_db_id, req_id))
179 | 
180 |     def save_crawl_result(self, result, crawled):
181 |         """
182 |         save the given result ie. update an existing request with the result (do NOT open or close the connection)
183 |     
184 |         :param result: result to save
185 |         :param crawled: (boolean) have been crawled
186 |         """
187 |         qry = "UPDATE request SET crawled=?, crawler_errors=?, user_output=? WHERE id=?"
188 |         values = (
189 |             1 if crawled else 0,
190 |             json.dumps(result.errors),
191 |             json.dumps(result.request.user_output) if len(result.request.user_output) > 0 else "",
192 |             result.request.db_id
193 |         )
194 | 
195 |         cur = self.conn.cursor()
196 |         cur.execute(qry, values)
197 | 
198 |     def make_request_crawlable(self, request):
199 |         """
200 |         update the scope and crawled status
201 |     
202 |         :param request:
203 |         """
204 |         qry = "UPDATE request SET crawled=0, out_of_scope=0 WHERE id=:id"
205 |         values = {"id": request.db_id}
206 | 
207 |         cur = self.conn.cursor()
208 |         cur.execute(qry, values)
209 | 
210 |     def get_requests(self, types="xhr"):
211 |         """
212 |         return a list of request matching the given types
213 |     
214 |         connect, retrieve the requests list then close the connection
215 |     
216 |         :param types: string of types (comma separated)
217 |         :return: list of matching request
218 |         """
219 |         types = types.split(",")
220 |         ret = []
221 |         qry = "SELECT * FROM request WHERE out_of_scope=0 AND type IN (%s)" % ",".join("?" * len(types))
222 | 
223 |         self.connect()
224 |         cur = self.conn.cursor()
225 |         cur.execute(qry, types) # nosemgrep 3393760109
226 |         for r in cur.fetchall():
227 |             # !! parent must be null (or unset)
228 |             req = Request(
229 |                 r['type'], r['method'], r['url'], referer=r['referer'], data=r['data'],
230 |                 json_cookies=r['cookies'], db_id=r['id'], parent_db_id=r['id_parent']
231 |             )
232 |             ret.append(req)
233 |         self.close()
234 | 
235 |         return ret
236 | 
237 |     def create_assessment(self, scanner, date):
238 |         """
239 |         connect, create a new assessment then close the connection
240 |         :param scanner:
241 |         :param date:
242 |         :return: id of the newly created assessment
243 |         """
244 | 
245 |         qry = "INSERT INTO assessment (scanner, start_date) VALUES (?,?)"
246 | 
247 |         self.connect()
248 | 
249 |         cur = self.conn.cursor()
250 | 
251 |         cur.execute(qry, (scanner, date))
252 |         cur.execute("SELECT last_insert_rowid() as id")
253 |         id = cur.fetchone()['id']
254 |         self.commit()
255 |         self.close()
256 |         return id
257 | 
258 |     def save_assessment(self, id_assessment, end_date):
259 |         """
260 |         connect, update the existing assessment with the given end date
261 |     
262 |         :param id_assessment:
263 |         :param end_date:
264 |         """
265 |         qry = "UPDATE assessment SET end_date=? WHERE id=?"
266 | 
267 |         self.connect()
268 |         cur = self.conn.cursor()
269 |         cur.execute(qry, (end_date, id_assessment))
270 |         self.commit()
271 |         self.close()
272 | 
273 |     def insert_vulnerability(self, id_assessment, id_request, type, description, error=""):
274 |         """
275 |         connect, create a vulnerability then close the connection
276 |     
277 |         :param id_assessment:
278 |         :param id_request:
279 |         :param type:
280 |         :param description:
281 |         :param error: default=""
282 |         """
283 |         qry = "INSERT INTO vulnerability (id_assessment, id_request, type, description, error) VALUES (?,?,?,?,?)"
284 | 
285 |         self.connect()
286 | 
287 |         cur = self.conn.cursor()
288 | 
289 |         cur.execute(qry, (id_assessment, id_request, type, description, error))
290 |         self.commit()
291 |         self.close()
292 | 
293 |     def get_crawled_request(self):
294 |         """
295 |         connect, retrieve existing already crawled requests then close the connection
296 |         :return: list of request
297 |         """
298 |         requests = []
299 |         query = "SELECT * FROM request WHERE crawled=1"
300 | 
301 |         self.connect()
302 |         cur = self.conn.cursor()
303 |         cur.execute(query)
304 |         for request in cur.fetchall():
305 |             req = Request(
306 |                 request['type'], request['method'], request['url'], referer=request['referer'], data=request['data'],
307 |                 json_cookies=request['cookies'], db_id=request['id'], parent_db_id=request['id_parent']
308 |             )
309 |             requests.append(req)
310 |         self.close()
311 | 
312 |         return requests
313 | 
314 |     def get_not_crawled_request(self):
315 |         """
316 |         connect, retrieve existing never crawled requests then close the connection
317 |         :return: list of request
318 |         """
319 |         requests = []
320 |         query = "SELECT * FROM request WHERE crawled=0 AND out_of_scope=0"
321 | 
322 |         self.connect()
323 |         cur = self.conn.cursor()
324 |         cur.execute(query)
325 |         for request in cur.fetchall():
326 |             req = Request(
327 |                 request['type'], request['method'], request['url'], referer=request['referer'], data=request['data'],
328 |                 json_cookies=request['cookies'], db_id=request['id'], parent_db_id=request['id_parent']
329 |             )
330 |             requests.append(req)
331 |         self.close()
332 | 
333 |         return requests
334 | 
335 |     def retrieve_crawl_info(self, crawl_id):
336 |         """
337 |         return the information stored for the given crawl
338 |         :param crawl_id: 
339 |         :return: random_seed
340 |         """
341 |         query = "SELECT random_seed, end_cookies FROM crawl_info WHERE rowid=?"
342 | 
343 |         self.connect()
344 |         cur = self.conn.cursor()
345 |         cur.execute(query, [crawl_id])
346 |         result = cur.fetchone()
347 |         self.close()
348 | 
349 |         return result["random_seed"], result["end_cookies"]
350 | 
351 | 
352 | _CREATE_CRAWL_INFO_TABLE_QUERY = """
353 | CREATE TABLE crawl_info (
354 |     htcap_version TEXT,
355 |     target TEXT,
356 |     start_date INTEGER,
357 |     end_date INTEGER,
358 |     commandline TEXT,
359 |     user_agent TEXT,
360 |     random_seed TEXT,
361 |     start_cookies TEXT,
362 |     end_cookies TEXT
363 | )
364 | """
365 | 
366 | _CREATE_REQUEST_TABLE_QUERY = """
367 | CREATE TABLE request (
368 |     id INTEGER PRIMARY KEY,
369 |     id_parent INTEGER,
370 |     type TEXT,
371 |     method TEXT,
372 |     url TEXT,
373 |     referer TEXT,
374 |     redirects INTEGER,
375 |     data  TEXT NOT NULL DEFAULT '',
376 |     cookies  TEXT NOT NULL DEFAULT '[]',
377 |     http_auth  TEXT,
378 |     out_of_scope INTEGER NOT NULL DEFAULT 0,
379 |     trigger TEXT,
380 |     crawled INTEGER NOT NULL DEFAULT 0,
381 |     crawler_errors TEXT,
382 |     user_output TEXT
383 | )
384 | """
385 | 
386 | _CREATE_REQUEST_INDEX_QUERY = """
387 | CREATE INDEX request_index ON request (type, method, url, http_auth, data, trigger)
388 | """
389 | 
390 | _CREATE_REQUEST_CHILD_TABLE_QUERY = """
391 | CREATE TABLE request_child (
392 |     id INTEGER PRIMARY KEY,
393 |     id_request INTEGER NOT NULL,
394 |     id_child INTEGER NOT NULL
395 | )
396 | """
397 | 
398 | _CREATE_REQUEST_CHILD_INDEX_QUERY = """
399 | CREATE INDEX request_child_index ON request_child (id_request, id_child)
400 | """
401 | 
402 | _CREATE_ASSESSMENT_TABLE_QUERY = """
403 | CREATE TABLE assessment(
404 |     id INTEGER PRIMARY KEY,
405 |     scanner TEXT,
406 |     start_date INTEGER,
407 |     end_date INTEGER
408 | )
409 | """
410 | 
411 | _CREATE_VULNERABILITY_TABLE_QUERY = """
412 | CREATE TABLE vulnerability(
413 |     id INTEGER PRIMARY KEY,
414 |     id_assessment INTEGER,
415 |     id_request INTEGER,
416 |     type TEXT,
417 |     description TEXT,
418 |     error TEXT
419 | )
420 | """
421 | 
422 | _BEGIN_TRANSACTION_QUERY = """BEGIN TRANSACTION"""
423 | 


--------------------------------------------------------------------------------
/tests/lib_tests/database_tests.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import unittest
  3 | 
  4 | from mock import MagicMock, PropertyMock, call, patch
  5 | 
  6 | from core.lib.database import Database
  7 | 
  8 | 
  9 | class DatabaseTestCase(unittest.TestCase):
 10 |     def setUp(self):
 11 |         self.connection_mock = MagicMock()
 12 |         self.cursor_mock = MagicMock()
 13 |         self.cursor_mock.execute = MagicMock()
 14 |         self.cursor_mock.fetchone = MagicMock()
 15 |         self.cursor_mock.fetchall = MagicMock(return_value=[])
 16 |         self.connection_mock.cursor = MagicMock(return_value=self.cursor_mock)
 17 |         self.connect_method_mock = MagicMock()
 18 |         self.commit_method_mock = MagicMock()
 19 |         self.close_method_mock = MagicMock()
 20 | 
 21 |         self.db = Database('my_db')
 22 | 
 23 |         self.db.conn = self.connection_mock
 24 |         self.db.connect = self.connect_method_mock
 25 |         self.db.commit = self.commit_method_mock
 26 |         self.db.close = self.close_method_mock
 27 | 
 28 | 
 29 | class DatabaseTest(DatabaseTestCase):
 30 |     def test_constructor(self):
 31 |         db = Database('my_db')
 32 | 
 33 |         self.assertEqual(db.dbname, 'my_db')
 34 |         self.assertEqual(db.conn, None)
 35 | 
 36 |     def test___str__(self):
 37 |         db = Database('my_db')
 38 | 
 39 |         self.assertEqual(str(db), 'my_db')
 40 | 
 41 |     def test_connect(self):
 42 |         sqlite3_mock = MagicMock()
 43 |         row_factory_mock = PropertyMock(return_value=None)
 44 | 
 45 |         type(sqlite3_mock).row_factory = row_factory_mock
 46 |         sqlite3.connect = sqlite3_mock
 47 | 
 48 |         db = Database('my_db')
 49 |         db.connect()
 50 | 
 51 |         sqlite3.connect.assert_called_with('my_db')
 52 |         self.assertIsInstance(db.conn, MagicMock)
 53 | 
 54 |     def test_close(self):
 55 |         close_mock = MagicMock()
 56 |         self.connection_mock.close = close_mock
 57 |         db = Database('my_db')
 58 |         db.conn = self.connection_mock
 59 | 
 60 |         db.close()
 61 | 
 62 |         close_mock.assert_called_once()
 63 | 
 64 |     def test_begin(self):
 65 |         self.db.begin()
 66 | 
 67 |         self.assertEqual(self.connection_mock.isolation_level, None)
 68 |         self.connection_mock.execute.assert_called_once_with("BEGIN TRANSACTION")
 69 | 
 70 |     def test_commit(self):
 71 |         self.connection_mock.commit = MagicMock()
 72 |         db = Database('my_db')
 73 |         db.conn = self.connection_mock
 74 | 
 75 |         db.commit()
 76 | 
 77 |         self.connection_mock.commit.assert_called_once()
 78 | 
 79 |     def test_create_success(self):
 80 |         self.db.initialize()
 81 | 
 82 |         self.connect_method_mock.assert_called_once()
 83 |         self.assertEqual(self.cursor_mock.execute.call_count, 7)
 84 |         self.commit_method_mock.assert_called_once()
 85 |         self.close_method_mock.assert_called_once()
 86 | 
 87 |     def test_save_crawl_info(self):
 88 |         self.cursor_mock.fetchone.return_value = {"id": 42}
 89 | 
 90 |         cookie_mock = MagicMock()
 91 |         cookie_mock.get_dict = MagicMock(return_value="some cookie")
 92 | 
 93 |         result = self.db.save_crawl_info(
 94 |             htcap_version="42.0", target="my target", start_date="my start date",
 95 |             commandline="my commandline", user_agent="some user agent", start_cookies=[cookie_mock, cookie_mock]
 96 |         )
 97 | 
 98 |         self.connect_method_mock.assert_called_once()
 99 |         self.assertEqual(cookie_mock.get_dict.call_count, 2)
100 |         self.assertEqual(
101 |             self.cursor_mock.execute.call_args_list[0],
102 |             call(
103 |                 "INSERT INTO crawl_info (htcap_version,target,start_date,commandline,user_agent,start_cookies) VALUES (?,?,?,?,?,?)",
104 |                 ["42.0", "my target", "my start date",
105 |                  "my commandline", "some user agent", '["some cookie", "some cookie"]']))
106 |         self.assertEqual(
107 |             self.cursor_mock.execute.call_args_list[1],
108 |             call(
109 |                 "SELECT last_insert_rowid() AS id"
110 |             )
111 |         )
112 |         self.commit_method_mock.assert_called_once()
113 |         self.close_method_mock.assert_called_once()
114 |         self.assertEqual(result, 42)
115 | 
116 |     def test_update_crawl_info(self):
117 |         cookie_mock = MagicMock()
118 |         cookie_mock.get_dict = MagicMock(return_value="some cookie")
119 | 
120 |         self.db.update_crawl_info(53, "some end date", "my random seed", [cookie_mock, cookie_mock])
121 | 
122 |         self.connect_method_mock.assert_called_once()
123 |         self.assertEqual(cookie_mock.get_dict.call_count, 2)
124 |         self.cursor_mock.execute.assert_called_once_with(
125 |             "UPDATE crawl_info SET end_date = ?, random_seed = ?, end_cookies = ? WHERE rowid = ?",
126 |             ["some end date", "my random seed", '["some cookie", "some cookie"]', 53])
127 |         self.commit_method_mock.assert_called_once()
128 |         self.close_method_mock.assert_called_once()
129 | 
130 |     def test_save_request_new_request_no_parent(self):
131 |         """
132 |         case where the request is new and has no parent
133 |         """
134 |         fetchone_returns = [None, {'id': 42}]
135 | 
136 |         def fetchone_side_effect():
137 |             result = fetchone_returns.pop(0)
138 |             return result
139 | 
140 |         request = MagicMock()
141 |         request.parent_db_id = None
142 |         request.type = "request type"
143 |         request.method = "METHOD"
144 |         request.url = "my url"
145 |         request.referer = "some referrer"
146 |         request.redirects = "some redirection"
147 |         request.data = "some data"
148 |         request.cookies = {}
149 |         request.http_auth = None
150 |         request.out_of_scope = False
151 |         request.trigger = None
152 |         request.user_output = []
153 | 
154 |         self.cursor_mock.fetchone.side_effect = fetchone_side_effect
155 | 
156 |         self.db.save_request(request)
157 | 
158 |         self.assertEqual(self.cursor_mock.execute.call_count, 3)
159 |         self.assertEqual(
160 |             self.cursor_mock.execute.call_args_list[0],
161 |             call(
162 |                 'SELECT * FROM request WHERE type=? AND method=? AND url=? AND http_auth=? AND data=? AND trigger=?',
163 |                 ("request type", "METHOD", "my url", "", "some data", "")
164 |             )
165 |         )
166 |         self.assertEqual(
167 |             self.cursor_mock.execute.call_args_list[1],
168 |             call(
169 |                 'INSERT INTO request (id_parent, type, method, url, referer, redirects, data, cookies, http_auth, out_of_scope, trigger, user_output) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)',
170 |                 (
171 |                     None, "request type", "METHOD", "my url", "some referrer", "some redirection", "some data", "[]",
172 |                     "", 0,
173 |                     "", "")
174 |             )
175 |         )
176 |         self.assertEqual(
177 |             self.cursor_mock.execute.call_args_list[2],
178 |             call(
179 |                 "SELECT last_insert_rowid() AS id"
180 |             )
181 |         )
182 | 
183 |     def test_save_request_old_request_with_parent(self):
184 |         """
185 |         case where the request exist and has a parent
186 |         """
187 | 
188 |         cookie_mock = MagicMock()
189 |         cookie_mock.get_dict = MagicMock(return_value={"cookie_value_1": "value1"})
190 | 
191 |         request = MagicMock()
192 |         request.parent_db_id = 42
193 |         request.type = "request type"
194 |         request.method = "METHOD"
195 |         request.url = "my url"
196 |         request.referer = "some referrer"
197 |         request.redirects = "some redirection"
198 |         request.data = "some data"
199 |         request.cookies = [cookie_mock]
200 |         request.http_auth = "auth"
201 |         request.out_of_scope = True
202 |         request.trigger = ["trigger1", "trigger2"]
203 |         request.html = "<html></html>"
204 |         request.user_output = ['some', 'output']
205 | 
206 |         self.cursor_mock.fetchone.return_value = {"id": 53}
207 | 
208 |         self.db.save_request(request)
209 | 
210 |         self.assertEqual(request.db_id, 53)
211 |         self.assertEqual(self.cursor_mock.execute.call_count, 2)
212 |         self.assertEqual(
213 |             self.cursor_mock.execute.call_args_list[0],
214 |             call(
215 |                 'SELECT * FROM request WHERE type=? AND method=? AND url=? AND http_auth=? AND data=? AND trigger=?',
216 |                 ("request type", "METHOD", "my url", "auth", "some data", '["trigger1", "trigger2"]')
217 |             )
218 |         )
219 |         self.assertEqual(
220 |             self.cursor_mock.execute.call_args_list[1],
221 |             call(
222 |                 "INSERT INTO request_child (id_request, id_child) VALUES (?,?)",
223 |                 (42, 53)
224 |             )
225 |         )
226 | 
227 |     def test_save_crawl_result_not_crawled(self):
228 |         result = MagicMock()
229 |         result.errors = []
230 |         result.request = MagicMock()
231 |         result.request.user_output = []
232 |         result.request.db_id = 42
233 | 
234 |         self.db.save_crawl_result(result=result, crawled=None)
235 | 
236 |         self.cursor_mock.execute.assert_called_once_with(
237 |             "UPDATE request SET crawled=?, crawler_errors=?, user_output=? WHERE id=?",
238 |             (0, "[]", "", 42)
239 |         )
240 | 
241 |     def test_save_crawl_result_crawled(self):
242 |         result = MagicMock()
243 |         result.errors = ["some", "errors"]
244 |         result.request = MagicMock()
245 |         result.request.user_output = ["some", "outputs"]
246 |         result.request.db_id = 42
247 | 
248 |         self.db.save_crawl_result(result=result, crawled=True)
249 | 
250 |         self.cursor_mock.execute.assert_called_once_with(
251 |             "UPDATE request SET crawled=?, crawler_errors=?, user_output=? WHERE id=?",
252 |             (1, '["some", "errors"]', '["some", "outputs"]', 42)
253 |         )
254 | 
255 |     def test_make_request_crawlable(self):
256 |         request = MagicMock()
257 |         request.db_id = 42
258 | 
259 |         self.db.make_request_crawlable(request)
260 | 
261 |         self.cursor_mock.execute.assert_called_once_with(
262 |             "UPDATE request SET crawled=0, out_of_scope=0 WHERE id=:id",
263 |             {"id": 42}
264 |         )
265 | 
266 |     def test_get_requests_without_result(self):
267 |         results = self.db.get_requests()
268 | 
269 |         self.connect_method_mock.assert_called_once()
270 |         self.cursor_mock.execute.assert_called_once_with(
271 |             "SELECT * FROM request WHERE out_of_scope=0 AND type IN (?)",
272 |             ["xhr"]
273 |         )
274 |         self.close_method_mock.assert_called_once()
275 |         self.assertEqual(results, [])
276 | 
277 |     @patch('core.lib.database.Request')
278 |     def test_get_requests_with_result(self, request_mock):
279 |         self.cursor_mock.fetchall.return_value = [
280 |             {
281 |                 "id": 42, "id_parent": 53,
282 |                 "type": "my type", "method": "METHOD", "url": "some url",
283 |                 "referer": "from here", "data": "some data", "cookies": "some cookies"
284 |             }
285 |         ]
286 | 
287 |         self.db.get_requests("xhr,an_other_type")
288 | 
289 |         request_mock.assert_called_once_with(
290 |             "my type", "METHOD", "some url", data="some data", db_id=42,
291 |             json_cookies="some cookies", parent_db_id=53,
292 |             referer="from here"
293 |         )
294 | 
295 |     def test_create_assessment(self):
296 |         self.cursor_mock.fetchone.return_value = {"id": 42}
297 | 
298 |         result = self.db.create_assessment('my scanner', 'start date')
299 | 
300 |         self.connect_method_mock.assert_called_once()
301 | 
302 |         self.assertEqual(self.cursor_mock.execute.call_count, 2)
303 |         self.assertEqual(
304 |             self.cursor_mock.execute.call_args_list[0],
305 |             call(
306 |                 "INSERT INTO assessment (scanner, start_date) VALUES (?,?)",
307 |                 ("my scanner", "start date")
308 |             )
309 |         )
310 |         self.assertEqual(
311 |             self.cursor_mock.execute.call_args_list[1],
312 |             call(
313 |                 "SELECT last_insert_rowid() as id"
314 |             )
315 |         )
316 |         self.assertEqual(result, 42)
317 |         self.commit_method_mock.assert_called_once()
318 |         self.close_method_mock.assert_called_once()
319 | 
320 |     def test_save_assessment(self):
321 |         self.db.save_assessment(42, "end date")
322 | 
323 |         self.connect_method_mock.assert_called_once()
324 |         self.cursor_mock.execute.assert_called_once_with(
325 |             "UPDATE assessment SET end_date=? WHERE id=?",
326 |             ("end date", 42)
327 |         )
328 |         self.commit_method_mock.assert_called_once()
329 |         self.close_method_mock.assert_called_once()
330 | 
331 |     def test_insert_vulnerability(self):
332 |         self.db.insert_vulnerability(42, 53, "my type", "my description")
333 | 
334 |         self.connect_method_mock.assert_called_once()
335 |         self.cursor_mock.execute.assert_called_once_with(
336 |             "INSERT INTO vulnerability (id_assessment, id_request, type, description, error) VALUES (?,?,?,?,?)",
337 |             (42, 53, "my type", "my description", "")
338 |         )
339 |         self.commit_method_mock.assert_called_once()
340 |         self.close_method_mock.assert_called_once()
341 | 
342 |     @patch('core.lib.database.Request')
343 |     def test_get_crawled_request(self, request_mock):
344 |         self.cursor_mock.fetchall.return_value = [
345 |             {
346 |                 "id": 42, "id_parent": 53,
347 |                 "type": "my type", "method": "METHOD", "url": "some url",
348 |                 "referer": "from here", "data": "some data", "cookies": "some cookies"
349 |             }
350 |         ]
351 |         results = self.db.get_crawled_request()
352 | 
353 |         self.connect_method_mock.assert_called_once()
354 |         self.cursor_mock.execute.assert_called_once_with(
355 |             "SELECT * FROM request WHERE crawled=1"
356 |         )
357 |         request_mock.assert_called_once_with(
358 |             "my type", "METHOD", "some url", data="some data", db_id=42,
359 |             json_cookies="some cookies", parent_db_id=53,
360 |             referer="from here"
361 |         )
362 |         self.close_method_mock.assert_called_once()
363 |         self.assertEqual(len(results), 1)
364 | 
365 |     @patch('core.lib.database.Request')
366 |     def test_get_not_crawled_request(self, request_mock):
367 |         self.cursor_mock.fetchall.return_value = [
368 |             {
369 |                 "id": 42, "id_parent": 53,
370 |                 "type": "my type", "method": "METHOD", "url": "some url",
371 |                 "referer": "from here", "data": "some data", "cookies": "some cookies"
372 |             }
373 |         ]
374 |         results = self.db.get_not_crawled_request()
375 | 
376 |         self.connect_method_mock.assert_called_once()
377 |         self.cursor_mock.execute.assert_called_once_with(
378 |             "SELECT * FROM request WHERE crawled=0 AND out_of_scope=0"
379 |         )
380 |         request_mock.assert_called_once_with(
381 |             "my type", "METHOD", "some url", data="some data", db_id=42,
382 |             json_cookies="some cookies", parent_db_id=53,
383 |             referer="from here"
384 |         )
385 |         self.close_method_mock.assert_called_once()
386 |         self.assertEqual(len(results), 1)
387 | 
388 |     def test_retrieve_crawl_options(self):
389 |         self.cursor_mock.fetchone.return_value = {"random_seed": "my seed", "end_cookies": "my end cookies"}
390 | 
391 |         results = self.db.retrieve_crawl_info(42)
392 | 
393 |         self.connect_method_mock.assert_called_once()
394 |         self.cursor_mock.execute.assert_called_once_with(
395 |             "SELECT random_seed, end_cookies FROM crawl_info WHERE rowid=?", [42]
396 |         )
397 |         self.close_method_mock.assert_called_once()
398 |         self.assertEqual(results, ("my seed", "my end cookies"))
399 | 


--------------------------------------------------------------------------------