├── .gitignore
├── BappDescription.html
├── BappManifest.bmf
├── BurpSmartBuster.py
├── DemoLabs - BurpSmartBuster - DEF CON 2016.pdf
├── DerbyCon 2016 - BurpSmartBuster - Stable Talk.pdf
├── LICENSE.md
├── README.md
├── bsb.ini
├── data.json
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/BappDescription.html:
--------------------------------------------------------------------------------
 1 | <p>This is a Burp Suite extension which discover content with a smart touch. A
 2 | bit like "DirBuster" and "Burp Discover Content", but smarter and being
 3 | integrated into Burp Suite this plugin looks at words in pages, the domain name,
 4 | the current directories and filename to help you find hidden files, directories
 5 | and information you usually don't with a static dictionary file that brute
 6 | forces its way on the web server.</p>
 7 | 
 8 | <p>For more information, please refer to
 9 | <a href="https://github.com/pathetiq/BurpSmartBuster">
10 |   https://github.com/pathetiq/BurpSmartBuster</a></p>
11 | 


--------------------------------------------------------------------------------
/BappManifest.bmf:
--------------------------------------------------------------------------------
 1 | Uuid: 7044ef35fa5a49b39285e101a79bf4ae
 2 | ExtensionType: 2
 3 | Name: BurpSmartBuster
 4 | RepoName: burp-smart-buster
 5 | ScreenVersion: 0.2                                                                                                      
 6 | SerialVersion: 3
 7 | MinPlatformVersion: 2                                                                                                    
 8 | ProOnly: False
 9 | Author: Patrick Mathieu @pathetiq
10 | ShortDescription: Looks for files, directories and file extensions based on current requests received by Burp Suite
11 | EntryPoint: BurpSmartBuster.py
12 | 


--------------------------------------------------------------------------------
/BurpSmartBuster.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -*-
   2 | '''
   3 | Created on 2015-02-22
   4 | 
   5 | BurpSmartBuster
   6 | @author: @pathetiq
   7 | @thanks: Abhineet & @theguly
   8 | @version: 0.3
   9 | @summary: This is a Burp Suite extension which discover content with a smart touch. A bit like “DirBuster” and “Burp Discover Content”,
  10 |           but smarter and being integrated into Burp Suite this plugin looks at words in pages, the domain name, the current directories and filename
  11 |           to help you find hidden files, directories and information you usually don't with a static dictionary file that brute force its way on the web server.
  12 | 
  13 | @bug: URL with variable, no file, no extension or weird variable separate by ; :, etc. breaks the directories/files listing
  14 | @todo: technology detection and scanning, community files, add 404 detection in output, threads speeds and adjustments
  15 | @todo: Add results to an issue. add tested files somewhere, add found file to sitemap.
  16 | 
  17 | '''
  18 | import os
  19 | os.environ["NLTK_DATA"] = os.path.join(os.getcwd(), "nltk_data")
  20 | 
  21 | #sys imports
  22 | import sys
  23 | 
  24 | #Find the jython path where our prerequisites packages are installed
  25 | import site
  26 | for site in site.getsitepackages():
  27 |     sys.path.append(site)
  28 | #Examples of paths if needed
  29 | #sys.path.append("/home/USERNAME/.local/lib/python2.7/site-packages/")
  30 | #sys.path.append("/usr/local/lib/python2.7/site-packages")
  31 | ##sys.path.append("/usr/lib/python2.7/dist-packages/")
  32 | #sys.path.append("/home/USERNAME/Documents/Apps/TextBlob")
  33 | #sys.path.append("/home/USERNAME/Documents/Apps/nltk")
  34 | 
  35 | #burp imports
  36 | from burp import IBurpExtender
  37 | from burp import IScanIssue
  38 | from burp import IScannerCheck
  39 | from burp import IScannerInsertionPoint
  40 | from burp import IHttpListener
  41 | from burp import IBurpExtenderCallbacks
  42 | 
  43 | #UI Import
  44 | from burp import IContextMenuFactory
  45 | from java.util import List, ArrayList
  46 | from burp import ITab
  47 | from javax.swing import JPanel, JLabel, JMenuItem, JTextField, JList, DefaultListModel, JButton, JFileChooser
  48 | from javax.swing import JScrollPane, ListSelectionModel, GroupLayout, ButtonGroup, JRadioButton
  49 | from java.awt import Dimension
  50 | from java.awt import Toolkit
  51 | from java.awt.datatransfer import StringSelection
  52 | 
  53 | #utils imports
  54 | from array import array
  55 | from java.io import PrintWriter
  56 | from java.net import URL
  57 | import os
  58 | import ConfigParser
  59 | import json
  60 | import logging
  61 | from tld import get_tld
  62 | import hashlib
  63 | import random
  64 | 
  65 | #spidering
  66 | from bs4 import BeautifulSoup
  67 | import Queue
  68 | 
  69 | #Parse HTML comments
  70 | from bs4 import Comment
  71 | import re
  72 | from urlparse import urlparse
  73 | 
  74 | #requester
  75 | import requests
  76 | import csv
  77 | from collections import deque
  78 | import threading
  79 | 
  80 | #text tokenization & natural language lib
  81 | locals()
  82 | #TODO: REVALIDATE the following : file /usr/local/lib/python2.7/dist-packages/nltk/internals.py line 902 has been change to remove os.getgroups() to compile in Burp...Jhython?
  83 | #http://textminingonline.com/getting-started-with-textblob
  84 | from textblob import TextBlob
  85 | 
  86 | 
  87 | 
  88 | 
  89 | '''----------------------------------------------------------------------------------------------------------------------------------------
  90 | BurpSmartBuster Logging object and config
  91 | ----------------------------------------------------------------------------------------------------------------------------------------'''
  92 | class Logger():
  93 | 
  94 |     LOG_FILENAME = 'BSB.log'
  95 |     DEFAULT_LEVEL = logging.DEBUG
  96 | 
  97 |     def __init__(self,name=LOG_FILENAME,level=DEFAULT_LEVEL):
  98 | 
  99 |         #define configs
 100 |         self._default_level=level
 101 |         self._name = name
 102 |         print "Log file is: " + name
 103 | 
 104 |         logging.basicConfig(filename=self._name+".log",
 105 |                             level=self._default_level,
 106 |                             format="%(asctime)s - [%(levelname)s] [%(threadName)s] (%(funcName)s:%(lineno)d) %(message)s",
 107 |                             )
 108 | 
 109 |         self._logger = logging.getLogger(name)
 110 |         return
 111 | 
 112 |     def getLogger(self):
 113 |         return self._logger
 114 | 
 115 | 
 116 | '''----------------------------------------------------------------------------------------------------------------------------------------
 117 | BurpSmartBuster main class (BurpExtender)
 118 | ----------------------------------------------------------------------------------------------------------------------------------------'''
 119 | class BurpExtender(IBurpExtender, IScanIssue, IScannerCheck, IScannerInsertionPoint,IHttpListener, IBurpExtenderCallbacks, IContextMenuFactory, ITab):
 120 | 
 121 |     # definitions
 122 |     EXTENSION_NAME = "BurpSmartBuster"
 123 |     AUTHOR = "@pathetiq"
 124 | 
 125 |     def registerExtenderCallbacks(self, callbacks):
 126 |         # keep a reference to our callbacks object
 127 |         self._callbacks = callbacks
 128 | 
 129 |         # obtain an extension helpers object
 130 |         self._helpers = callbacks.getHelpers()
 131 | 
 132 |         # define stdout writer
 133 |         self._stdout = PrintWriter(callbacks.getStdout(), True)
 134 | 
 135 |         print(self.EXTENSION_NAME + ' by ' + self.AUTHOR)
 136 |         print('================================')
 137 |         print('This extension will create new requests for ALL "in scope" HTTP request made through Burp. Make sure to filter scope items')
 138 |         print('For help or any information see the github page or contact the author on twitter.')
 139 |         print('Note: The Spider currently only supports English, see author github page for new language installation instructions')
 140 | 
 141 |         # set our extension name
 142 |         callbacks.setExtensionName(self.EXTENSION_NAME)
 143 |         callbacks.registerScannerCheck(self)
 144 |         callbacks.registerHttpListener(self)
 145 |         callbacks.registerContextMenuFactory(self)
 146 | 
 147 |         #Initialize tab details
 148 | 
 149 |         #fields of options setBounds(x,y,width,heigth)
 150 |         self.verboseLabel = JLabel("Verbose")
 151 |         self.verboseLabel.setBounds(10,10,130,30)
 152 | 
 153 |         self.yesVerboseButton = JRadioButton("Yes")
 154 |         self.yesVerboseButton.setSelected(True)
 155 |         self.yesVerboseButton.setBounds(10,40,50,30)
 156 |         self.noVerboseButton = JRadioButton("No")
 157 |         self.noVerboseButton.setBounds(70,40,50,30)
 158 | 
 159 |         self.buttonGroup = ButtonGroup()
 160 |         self.buttonGroup.add(self.yesVerboseButton)
 161 |         self.buttonGroup.add(self.noVerboseButton)
 162 | 
 163 |         self.spiderPagesLabel = JLabel("Spider: Nbr of pages")
 164 |         self.spiderPagesLabel.setBounds(10,70,200,30)
 165 |         self.spiderPagesTextField = JTextField(300)
 166 |         self.spiderPagesTextField.setText("5")
 167 |         self.spiderPagesTextField.setBounds(10,100,300,30)
 168 |         self.spiderPagesTextField.setPreferredSize( Dimension( 250, 20 ) )
 169 | 
 170 |         self.spiderRecPagesLabel = JLabel("Recursive: Nbr of pages")
 171 |         self.spiderRecPagesLabel.setBounds(10,130,250,30)
 172 |         self.spiderRecPagesTextField = JTextField(300)
 173 |         self.spiderRecPagesTextField.setText("3")
 174 |         self.spiderRecPagesTextField.setBounds(10,160,300,30)
 175 |         self.spiderRecPagesTextField.setPreferredSize( Dimension( 250, 20 ) )
 176 | 
 177 |         self.fileTypeLabel = JLabel("Ignore Filetypes")
 178 |         self.fileTypeLabel.setBounds(10,190,130,30)
 179 |         self.fileTypeTextField = JTextField(300)
 180 |         self.fileTypeTextField.setText("gif,jpg,png,css,js,ico,woff")
 181 |         self.fileTypeTextField.setBounds(10,220,300,30)
 182 |         self.fileTypeTextField.setPreferredSize( Dimension( 250, 20 ) )
 183 | 
 184 |         self.inScopeLabel = JLabel("Scan in-scope URLs only?")
 185 |         self.inScopeLabel.setBounds(10,250,200 ,30)
 186 | 
 187 |         self.yesInScopeButton = JRadioButton("Yes")
 188 |         self.yesInScopeButton.setBounds(10,280,50,30)
 189 |         self.yesInScopeButton.setSelected(True)
 190 |         self.noInScopeButton = JRadioButton("No")
 191 |         self.noInScopeButton.setBounds(70,280,50,30)
 192 | 
 193 |         self.buttonGroup1 = ButtonGroup()
 194 |         self.buttonGroup1.add(self.yesInScopeButton)
 195 |         self.buttonGroup1.add(self.noInScopeButton)
 196 | 
 197 |         self.refreshConfigButton = JButton("Update Configuration", actionPerformed=self.updateConfig)
 198 |         self.refreshConfigButton.setBounds(10,310,200,30)
 199 | 
 200 |         #Jlist to contain the results
 201 |         self.list = JList([])
 202 |         self.list.setSelectionMode(ListSelectionModel.MULTIPLE_INTERVAL_SELECTION)
 203 |         self.list.setLayoutOrientation(JList.VERTICAL)
 204 |         self.list.setVisibleRowCount(-1)
 205 |         self.listScroller = JScrollPane(self.list,JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED)
 206 |         self.listScroller.setBounds(510,40,500,500)
 207 |         #self.listScroller.setPreferredSize(Dimension(400, 500))
 208 | 
 209 |         self.urlFoundLabel = JLabel("URLs Found")
 210 |         self.urlFoundLabel.setBounds(510,10,130,30)
 211 |         self.listScroller.setPreferredSize(Dimension(500, 100))
 212 |         self.listScroller.setViewportView(self.list)
 213 | 
 214 |         self.clearListButton = JButton("Clear list", actionPerformed=self.clearList)
 215 |         self.clearListButton.setBounds(350,40,150,30)
 216 | 
 217 |         self.copyListButton = JButton("Copy Selected", actionPerformed=self.copyList)
 218 |         self.copyListButton.setBounds(350,70,150,30)
 219 | 
 220 |         self.deleteListButton = JButton("Delete Selected", actionPerformed=self.deleteSelected)
 221 |         self.deleteListButton.setBounds(350,100,150,30)
 222 | 
 223 |         self.exportListButton = JButton("Export list", actionPerformed=self.exportList)
 224 |         self.exportListButton.setBounds(350,130,150,30)
 225 | 
 226 | 
 227 |         #main panel
 228 |         self.mainpanel = JPanel()
 229 |         self.mainpanel.setLayout(None)
 230 | 
 231 |         self.mainpanel.add(self.verboseLabel)
 232 |         self.mainpanel.add(self.yesVerboseButton)
 233 |         self.mainpanel.add(self.noVerboseButton)
 234 |         self.mainpanel.add(self.spiderPagesLabel)
 235 |         self.mainpanel.add(self.spiderPagesTextField)
 236 |         self.mainpanel.add(self.spiderRecPagesLabel)
 237 |         self.mainpanel.add(self.spiderRecPagesTextField)
 238 |         self.mainpanel.add(self.fileTypeLabel)
 239 |         self.mainpanel.add(self.fileTypeTextField)
 240 |         self.mainpanel.add(self.inScopeLabel)
 241 |         self.mainpanel.add(self.yesInScopeButton)
 242 |         self.mainpanel.add(self.noInScopeButton)
 243 |         self.mainpanel.add(self.refreshConfigButton)
 244 |         self.mainpanel.add(self.urlFoundLabel)
 245 |         self.mainpanel.add(self.listScroller)
 246 |         self.mainpanel.add(self.clearListButton)
 247 |         self.mainpanel.add(self.copyListButton)
 248 |         self.mainpanel.add(self.deleteListButton)
 249 |         self.mainpanel.add(self.exportListButton)
 250 | 
 251 |         callbacks.customizeUiComponent(self.mainpanel)
 252 |         callbacks.addSuiteTab(self)
 253 | 
 254 |         #set default config file name and values
 255 | 
 256 |         #only smart is use, keeping other for future development
 257 |         self._configSmart_Local = False
 258 |         self._configSmart_Smart = True
 259 |         self._configSmart_File = False
 260 |         self._configSmart_Spider = False
 261 |         self._trailingSlash = True
 262 | 
 263 |         #To be fetch from the UI settings
 264 |         self._configSpider_NumberOfPages = 5
 265 |         self._verbose = False
 266 |         self._ignoreFileType = ["gif","jpg","png","css","js","ico","woff"]
 267 |         #keeping to use it
 268 |         self._configInScope_only = True
 269 |         self._configSpider_NumberOfPages = 5
 270 | 
 271 |         #Get a logger object for logging into file
 272 |         loggerTemp = Logger(self.EXTENSION_NAME,logging.DEBUG)
 273 |         self._logger= loggerTemp.getLogger()
 274 | 
 275 |         #get the config file, will overwrite default config if the ini file is different
 276 |         #self.getSmartConfiguration()
 277 | 
 278 |         #get config from the UI
 279 |         self.updateConfig("")
 280 | 
 281 |         #words gather on the page from the spidering
 282 |         self._words = {}
 283 |         self._mergedWords = {}
 284 | 
 285 |         #robots.txt list
 286 |         self._robots = {}
 287 |         self._robotsScanned = {}
 288 | 
 289 |         #sitemap.xml list
 290 |         self._sitemap = {}
 291 | 
 292 |         #url in comments
 293 |         self._urlsInComment = {}
 294 | 
 295 |         #domain names to query current url/path/files for hidden items
 296 |         self._smartDomain = {}
 297 | 
 298 |         #sitemap and robots scanned once
 299 |         self._siteRobotScanned = {}
 300 | 
 301 |         #Load our BSB json data
 302 |         self._jsonFile = "data.json"
 303 |         jsonfile = open(self._jsonFile)
 304 |         self._parsed_json = json.load(jsonfile)
 305 |         jsonfile.close()
 306 | 
 307 |         #define the request object to use each time we need to call a URL
 308 |         self._requestor = Requestor(self._logger,self)
 309 | 
 310 |         #Variable to define if unique data has already been grabbed
 311 |         self._smartRequestData = {}
 312 |         self._smartRequestPath = {}
 313 |         self._smartRequestFiles = {}
 314 |         #number of time the spider have run
 315 |         self._spiderRan = {} #Array of domain. If domain exist. Spider did ran!
 316 | 
 317 |         return
 318 | 
 319 |     '''
 320 |     Graphic Functions
 321 |     '''
 322 |     def createMenuItems(self, contextMenuInvocation):
 323 |         self._contextMenuData = contextMenuInvocation.getSelectedMessages()
 324 |         menu_list = ArrayList()
 325 |         menu_list.add(JMenuItem("Send to BurpSmartBuster",actionPerformed=self.menuItemClicked))
 326 |         return menu_list
 327 | 
 328 |     def menuItemClicked(self, event):
 329 |         data = self.getURLdata(self._contextMenuData[0],True)
 330 |         self._logger.info("SMARTREQUEST FOR: "+data.getUrl().toString())
 331 |         self._logger.debug("Executing: smartRequest() from menuItemClicked")
 332 |         thread = threading.Thread(
 333 |             target=self.smartRequest,
 334 |             name="Thread-smartRequest",
 335 |             args=[data],)
 336 |         thread.start()
 337 | 
 338 |     # Implement ITab
 339 |     def getTabCaption(self):
 340 |         return self.EXTENSION_NAME
 341 | 
 342 |     # Return our panel and button we setup. Components of our extension's tab
 343 |     def getUiComponent(self):
 344 |         return self.mainpanel
 345 | 
 346 |     '''------------------------------------------------
 347 |     Extension Unloaded
 348 |     ------------------------------------------------'''
 349 |     def extensionUnloaded(self):
 350 |         self._logger.info("Extension was unloaded")
 351 |         return
 352 | 
 353 |     '''------------------------------------------------
 354 |     VERBOSE FUNCTION
 355 | 
 356 |     Display each tested URL
 357 |     ------------------------------------------------'''
 358 |     def verbose(self,text):
 359 |         #Is verbose on or off from config file?
 360 |         if self._verbose == True:
 361 |             print "[VERBOSE]: "+text
 362 |         return
 363 | 
 364 |     '''------------------------------------------------
 365 |     GRAPHICAL FUNCTIONS for BUTTONS
 366 |     ------------------------------------------------'''
 367 | 
 368 |     def getRecursiveConfig(self):
 369 |         return int(self.spiderRecPagesTextField.getText())
 370 | 
 371 |     #refresh the config from the UI
 372 |     def updateConfig(self,meh):
 373 |         self._configSpider_NumberOfPages = int(self.spiderPagesTextField.getText())
 374 | 
 375 |         if self.yesVerboseButton.isSelected():
 376 |             self._verbose = True
 377 |         else:
 378 |             self._verbose = False
 379 | 
 380 |         if self.yesInScopeButton.isSelected():
 381 |             self._configInScope_only = True
 382 |         else:
 383 |             self._configInScope_only = False
 384 | 
 385 |         fileType = []
 386 |         fileTypeStr = self.fileTypeTextField.getText()
 387 |         self._ignoreFileType = self.fileTypeTextField.getText().split(",")
 388 | 
 389 |         self._logger.info("Config changed: " + "spiderNbrPages=" + str(self._configSpider_NumberOfPages) + ", Verbose is:" + str(self._verbose) + ", InScope is:" + str(self._configInScope_only) + ", fileTypeIgnored: " + str(self._ignoreFileType))
 390 |         print "Now using config: " + "spiderNbrPages=" + str(self._configSpider_NumberOfPages) + ", Verbose is:" + str(self._verbose) + ", InScope is:" + str(self._configInScope_only) + ", fileTypeIgnored: " + str(self._ignoreFileType)
 391 | 
 392 |         return
 393 | 
 394 |     #add a URL to the list
 395 |     def addURL(self,url):
 396 |         list = self.getListData()
 397 |         list.append(url)
 398 | 
 399 |         self.list.setListData(list)
 400 |         return
 401 | 
 402 |     #return the who list
 403 |     def getListData(self):
 404 |         list = []
 405 | 
 406 |         for i in range(0, self.list.getModel().getSize()):
 407 |             list.append(self.list.getModel().getElementAt(i))
 408 | 
 409 |         return list
 410 | 
 411 |     #Clear the list
 412 |     def clearList(self,meh):
 413 |         self.list.setListData([])
 414 |         return
 415 | 
 416 |     #Copy to clipboard
 417 |     def copyList(self,meh):
 418 |         clipboard = Toolkit.getDefaultToolkit().getSystemClipboard()
 419 |         list = self.getListData()
 420 |         selected = self.list.getSelectedIndices().tolist()
 421 | 
 422 |         copied = ""
 423 |         urls = ""
 424 |         for i in selected:
 425 |             url = str(list[i]).split(',')[0]
 426 |             urls = urls+str(url)+"\n"
 427 | 
 428 |         clipboard.setContents(StringSelection(urls), None)
 429 | 
 430 |         return
 431 | 
 432 |     #Delete selected item from the list
 433 |     def deleteSelected(self,meh):
 434 |         x = self.list.getSelectedIndices().tolist()
 435 |         list = self.getListData()
 436 | 
 437 |         for i in reversed(x):
 438 |             del list[i]
 439 | 
 440 |         self.list.setListData(list)
 441 |         return
 442 | 
 443 |     #TODO: save as the list
 444 |     def exportList(self,meh):
 445 |         fd = JFileChooser()
 446 |         dialog = fd.showDialog(self.mainpanel, "Save List As")
 447 | 
 448 |         dataList = self.getListData()
 449 | 
 450 |         urls = ""
 451 | 
 452 |         if dialog == JFileChooser.APPROVE_OPTION:
 453 |             file = fd.getSelectedFile()
 454 |             path = file.getCanonicalPath()
 455 | 
 456 |             try:
 457 |                 with open(path, 'w') as exportFile:
 458 |                     for item in dataList:
 459 |                         url = str(item).split(',')[0]
 460 |                         exportFile.write(url+"\n")
 461 |             except IOError as e:
 462 |                 print "Error exporting list: " + str(e)
 463 |                 self._logger.debug("Error exporting list to: " + path + ", Error: " + str(e))
 464 | 
 465 |         return
 466 | 
 467 |     '''------------------------------------------------------------------------------------------------
 468 |     MAIN FUNCTION / WHERE EVERYTHING STARTS
 469 | 
 470 |     For every request which isn't created from the Extender(this might have to be change)
 471 |     The request is analyse and related to the config options new request are create to test if
 472 |     specific files/paths/directories exists.
 473 |     ------------------------------------------------------------------------------------------------'''
 474 |     def processHttpMessage(self, toolFlag, messageIsRequest, messageInfo): #IHttpRequestResponse message info
 475 | 
 476 | 
 477 |         #TODO: not from repeater and intruder --> set in ini file too! --> and toolFlag != self._callbacks.TOOL_EXTENDER
 478 | 
 479 |         #This is required to not LOOP Forever as our plugin generate requests!
 480 |         if toolFlag == self._callbacks.TOOL_PROXY and toolFlag != self._callbacks.TOOL_EXTENDER and toolFlag != self._callbacks.TOOL_SCANNER:
 481 | 
 482 |             #Get an Urldata object to use later
 483 |             data = self.getURLdata(messageInfo,messageIsRequest)
 484 | 
 485 |             #VERIFICATION: if URL is in scope we do scan
 486 |             if not self._callbacks.isInScope(data.getUrl()):
 487 |                 #self._callbacks.includeInScope(url)
 488 |                 self._logger.info("URL not in scope: " + data.getUrl().toString())
 489 |                 return
 490 | 
 491 |             if messageIsRequest:
 492 |                 self._logger.debug("Entering: processHttpMessage() REQUEST")
 493 |                 self._logger.debug("Request from domain: "+data.getDomain())
 494 | 
 495 |                 #REJECT specific extension on request
 496 |                 if data.getFileExt() in self._ignoreFileType:
 497 |                     self._logger.info("FILETYPE IGNORED: " + data.getUrl().toString())
 498 |                     return
 499 | 
 500 |                 ###############################################
 501 |                 # Decide which mode to use based on ini config
 502 |                 ###############################################
 503 | 
 504 |                 #from browsed file only
 505 |                 if self._configSmart_Smart:
 506 |                     self._logger.info("SMARTREQUEST FOR: "+data.getUrl().toString())
 507 |                     self._logger.debug("Executing: smartRequest()")
 508 |                     thread = threading.Thread(
 509 |                         target=self.smartRequest,
 510 |                         name="Thread-smartRequest",
 511 |                         args=[data],
 512 |                     )
 513 |                     thread.start()
 514 |                     thread.join()
 515 | 
 516 |                 #wordlist adjust with the domain name
 517 |                 elif self._configSmart_Local:
 518 |                     self._logger.debug("Executing: localRequest()")
 519 |                     self.localRequest(data)
 520 | 
 521 |                 #your own wordlist, no smart here
 522 |                 elif self._configSmart_File:
 523 |                     self._logger.debug("Executing: fileRequest()")
 524 |                     self.fileRequest(data)
 525 | 
 526 |                 #spidered items only. Like smart but it browse for you.
 527 |                 elif self._configSmart_Spider:
 528 |                     self._logger.debug("Executing: spiderRequest()")
 529 |                     self.spiderRequest(data)
 530 | 
 531 |             else: #if response
 532 |                 self._logger.debug("Entering: processHttpMessage() RESPONSE")
 533 | 
 534 |                 ###############################################
 535 |                 # Decide which mode to use based on ini config
 536 |                 ###############################################
 537 |                 #VERIFICATION: if URL is in scope we do scan
 538 |                 #if not self._callbacks.isInScope(data.getUrl()):
 539 |                 #    #self._callbacks.includeInScope(url)
 540 |                 #    self._logger.info("URL %s not in scope: " % data.getUrl())
 541 |                 #    return
 542 | 
 543 |                 #from browsed file only
 544 |                 #TODO: sniff JS and CSS file for URLS
 545 |                 #if self._configSmart_Smart:
 546 |                 self._logger.debug("Executing: getUrlInComments()")
 547 |                 thread = threading.Thread(
 548 |                     target=self.getUrlInComments,
 549 |                     name="Thread-getUrlInComments",
 550 |                     args=[data],
 551 |                 )
 552 |                 thread.start()
 553 |                 thread.join()
 554 |         return
 555 | 
 556 |     '''----------------------------------------------------------------------------------------------------------
 557 |     BurpSmartBuster main class (BurpExtender)
 558 |     Only spidering to gather the more page and test those
 559 |     ----------------------------------------------------------------------------------------------------------'''
 560 |     def spiderRequest(self, data):
 561 |         return
 562 | 
 563 |     '''----------------------------------------------------------------------------------------------------------
 564 |     Use BSB files on all visited page
 565 |     ----------------------------------------------------------------------------------------------------------'''
 566 |     def localRequest(self, data):
 567 |         return
 568 | 
 569 |     '''----------------------------------------------------------------------------------------------------------
 570 |     Use user supply file on all visited page
 571 |     ----------------------------------------------------------------------------------------------------------'''
 572 |     def fileRequest(self, data):
 573 |         return
 574 | 
 575 | 
 576 |     '''----------------------------------------------------------------------------------------------------------
 577 |     Use the logic, based on the BSB files and data from the website
 578 |     This is where all the magic happens.
 579 | 
 580 |     We want to :
 581 |     - Call some file extension for the file we browsed to
 582 |         -TODO:  Get a huge list
 583 |             - Extension
 584 |             - User file, windows, linux, osx
 585 |     - Call some path when browsing a new path (even when it is a file)
 586 |         - default path list
 587 |     - Call some files when browsing a new path
 588 |         - user files windows, osx, linux
 589 |         - backup list
 590 |         - autosave list
 591 |         - svn, git list
 592 |         - CMS
 593 |         - Web server, etc.
 594 |     - Get robots.txt and sitemap data
 595 |     - Brute force up to 2 or 3 letters of files names and path on all found path which is not cms/git/etc.
 596 | 
 597 |     - Future version: Parse HTML comments for path
 598 | 
 599 | 
 600 |     - If they exist, we add them to XXX?
 601 |     - If new path exists, let's go recursive (new class?)
 602 |     - If file exists: add to sitemap + verbose + log
 603 | 
 604 |     @param data: UrlData object containing all information about the URL
 605 |     ----------------------------------------------------------------------------------------------------------'''
 606 |     def smartRequest(self,data):
 607 | 
 608 |         #Current request variables
 609 |         domain = data.getDomain()
 610 |         url = data.getUrl()
 611 | 
 612 |         ##################### FETCH DATA ###############################
 613 |         # Gather smart data once before sending requests
 614 |         ################################################################
 615 |         self._logger.debug("Has the Data been gathered for? : "+ str(url))
 616 |         if domain not in self._smartRequestData:
 617 |             try:
 618 |                 self._smartRequestData[domain] = True
 619 |                 self._logger.debug("no")
 620 |                 self._logger.info("Fetching data for: "+ domain)
 621 | 
 622 |                 print "getting data for:" + str(url)
 623 |                 self.getSmartData(data)
 624 | 
 625 |             except Exception as e:
 626 |                 print "exception:"+ e
 627 |                 self._smartRequestData[domain] = False
 628 |                 return False
 629 |         else:
 630 |             self._logger.debug("yes")
 631 | 
 632 |         # Execution of request with the received data:
 633 |         # - spider
 634 |         # - sitemap
 635 |         # - robots
 636 |         # - current directories
 637 |         # - commentsInUrl
 638 |         # json data:
 639 |         # - extension files
 640 |         # - common basic cms files
 641 |         # - common server files
 642 |         # - common user files
 643 |         # - common test files
 644 |         # - common repositories files
 645 |         # -
 646 |         # -
 647 |         '''
 648 |         For the current directories (path)
 649 |         - Test a path/file for a category of path/files
 650 |             - If a tested path/files exist (200/401/403/500) scan other files + - add to sitemap and LOG + add issues?
 651 |             - If not skip it
 652 |             - go 3 deep max and retest all
 653 | 
 654 |         TODO future version:
 655 |         Pseudo algo:
 656 |         Si le present url est un fichier:
 657 |         - Si c'Est un fichier php... tester phps extension.
 658 |         - si c'Est un fichier asmx, tester les wsdl
 659 | 
 660 |         Si c'Est un path:
 661 |         - si ca inclus un path dans sharepoint, tester les sharepoints
 662 |         - si ca inclus un fichier de wordpress ou drupal, tester quelques fichiers cms
 663 |             - Si on trouve un répertoire de type X, effectuer une recherche sur les fichiers de type X dans le repertoire trouvé
 664 |         '''
 665 | 
 666 | 
 667 | 
 668 | 
 669 | 
 670 |         #Current request data
 671 |         baseUrl = data.getBaseUrl()
 672 |         path = data.getPath()
 673 |         filename = data.getFilename()
 674 |         extension = data.getFileExt()
 675 |         print "CURRENT FILE: " + baseUrl + "," + filename + "," + extension
 676 |         #data.json sections: extensions, fileprefix, filesuffix, files, directories
 677 | 
 678 |         #test local file
 679 |         #if current url is a file: test extentions + intelligent details
 680 |         #AND we test current file with prefix and suffix
 681 | 
 682 |         #testing directories
 683 |         #if current URL have some directories test them out
 684 |         #Test them with FILES and DIRECTORIES. Including the current directory (last in path)
 685 | 
 686 |         #with the smart data test robots path and files
 687 |         #test N url from sitemap
 688 |         #in current paths test files and path using domainname and domain without the tld
 689 |         #with filename generated + extensions and path/filenamegenerated
 690 |         '''
 691 |         print "EXTENSIONS"
 692 |         for extension in self._parsed_json["extensions"]:
 693 |             print extension["name"]
 694 |         print "SUFFIX PREFIX"
 695 |         for prefix in self._parsed_json["fileprefix"]:
 696 |             print prefix["name"]
 697 |         for suffix in self._parsed_json["filesuffix"]:
 698 |             print suffix["name"]
 699 | 
 700 |         print "FILES"
 701 |         for files in self._parsed_json["files"]:
 702 |             print files["name"]
 703 |         '''
 704 | 
 705 |         print "DIRECTORIES"
 706 | 
 707 |         #Directories data information
 708 |         directories = data.getDirectories()
 709 |         directory = "/"
 710 |         slash = "" #force slash or not var
 711 | 
 712 |         #get options foir trailing slash. By default it's ON
 713 |         if self._trailingSlash:
 714 |             slash = "/"
 715 | 
 716 |         ##################### EXECUTE DATA.json REQUESTS ###################
 717 |         # Build Request to be execute based on our data.json
 718 |         # and getSmartData results
 719 |         ################################################################
 720 | 
 721 |         #TODO: important put tested directories and files in a dictionnary or array
 722 |         #TODO: important put tested directories and files in a dictionnary or array
 723 |         #TODO: important put tested directories and files in a dictionnary or array
 724 |         #TODO: important put tested directories and files in a dictionnary or arrayà
 725 | 
 726 | 
 727 |         ########################
 728 |         # Technology scanner
 729 |         ########################
 730 |         '''
 731 |         - do a request to root dir
 732 |         - get response (check for redirect)
 733 |         - check headers
 734 |         - check file extensions
 735 |         - depending on results scan X files.
 736 |           - Set current domain technologyVar to X
 737 |         '''
 738 | 
 739 |         ################
 740 |         #Scan the root directory!
 741 |         ################
 742 |         print "DIR: "+str(directories)
 743 | 
 744 |         if not directories:
 745 |             directories = ["/"]
 746 | 
 747 |         # response will be dealed in requestor
 748 |         for dir in directories:
 749 |             print "TESTING: " + dir
 750 |             if dir == "/":
 751 |                 directory = "/"
 752 |             else:
 753 |                 directory = directory+dir+"/" #test all directories: / /a/ /a/b/ /a/b/c/ ...
 754 | 
 755 |             #call our directories inside all request directires
 756 |             for dir2 in self._parsed_json["directories"]:
 757 |                 self.verbose("RequestDir for: "+baseUrl+directory+dir2["name"]+slash)
 758 |                 self._requestor.addRequest(baseUrl+directory+dir2["name"]+slash,data)
 759 | 
 760 |             # call directories based on domain information: url/a/b/c/smartDomain , url/a/b/smartDomain/, etc.
 761 |             #print "SMARTDOMAIN"+self._smartDomain
 762 |             for dir2 in self._smartDomain[domain]:
 763 |                 self.verbose("RequestSmartDomain for: " + baseUrl + directory + dir2)
 764 |                 self._requestor.addRequest(baseUrl + directory + dir2,data)
 765 | 
 766 |                 #in each directory call smartDomain.extensions
 767 |                 for ext in self._parsed_json["extensions"]:
 768 |                     self.verbose("RequestSmartDomain.ext for: " + baseUrl + directory + dir2 + ext["name"])
 769 |                     self._requestor.addRequest(baseUrl + directory + dir2 + ext["name"],data)
 770 | 
 771 |             #call our files in all directories
 772 |             #print "parsed json"+self._parsed_json["files"]
 773 |             for files in self._parsed_json["files"]:
 774 |                 self.verbose("RequestFile for: "+baseUrl+directory+files["name"])
 775 |                 self._requestor.addRequest(baseUrl+directory+files["name"],data)
 776 | 
 777 | 
 778 |         ################
 779 |         #If URL is a file, let's try to add some extension to the file
 780 |         ################
 781 |         if extension:
 782 | 
 783 |             #replace current file extension for our extension
 784 |             tempFilenameUrl = baseUrl+directory+filename
 785 |             tempFilenameUrl1 = baseUrl+directory+filename+"."+extension
 786 |             for ext in self._parsed_json["extensions"]:
 787 |                 self.verbose("RequestExt for: "+ tempFilenameUrl+ext["name"])
 788 |                 self.verbose("RequestFileExt for: "+ tempFilenameUrl1+ext["name"])
 789 |                 self._requestor.addRequest(tempFilenameUrl+ext["name"],data)
 790 |                 self._requestor.addRequest(tempFilenameUrl1+ext["name"],data)
 791 | 
 792 |             #add a prefix to current file
 793 |             tempFilenameUrl = baseUrl+directory
 794 |             for prefix in self._parsed_json["fileprefix"]:
 795 |                 tempFilenameUrl1 = tempFilenameUrl+prefix["name"]+filename+"."+extension
 796 |                 self.verbose("RequestPrefix for: "+tempFilenameUrl1)
 797 |                 self._requestor.addRequest(tempFilenameUrl1,data)
 798 | 
 799 |             #add suffix to current file
 800 |             tempFilenameUrl = baseUrl+directory
 801 |             for suffix in self._parsed_json["filesuffix"]:
 802 |                 tempFilenameUrl1 = tempFilenameUrl+filename+suffix["name"]+"."+extension
 803 |                 self.verbose("RequestSuffix for: "+tempFilenameUrl1)
 804 |                 self._requestor.addRequest(tempFilenameUrl1,data)
 805 | 
 806 | 
 807 | 
 808 |         #make sure we have some data
 809 |         #print "DATA RECEIVED"
 810 |         #print self._words[domain]
 811 |         #print self._mergedWords ##need to call the emrge function if needed
 812 |         #print self._robots[domain]
 813 |         #print str(len(self._sitemap[domain]))
 814 |         #print str(self._urlsInComment[domain])
 815 | 
 816 | 
 817 | 
 818 |         ##################### EXECUTE SMART REQUESTS ###################
 819 |         # Build Request to be execute based on our data.json
 820 |         # and getSmartData results
 821 |         ################################################################
 822 | 
 823 |         #list of smart directories
 824 |         smartDirectories = {}
 825 | 
 826 |         #list of smart files (add our extension to it)
 827 |         smartfiles = {}
 828 | 
 829 |         ################
 830 |         #Request N pages from sitemap
 831 |         ################
 832 |         if domain not in self._siteRobotScanned: #Do it once
 833 |             self._siteRobotScanned[domain] = True #done for this domain
 834 | 
 835 |             tmpSiteMap = []
 836 |             for i in range(0,self._configSpider_NumberOfPages): #get N number of pages from ini config
 837 |                 tmpSiteMap.append(self._sitemap[domain][i])
 838 | 
 839 |             #Requests files and directories from robots.txt
 840 |             tmpRobots = []
 841 |             for line in self._robots[domain]:
 842 | 
 843 |                 #in case robots.txt use ending wildcard we remove it
 844 |                 if line.endswith("*"):
 845 |                     line = line[:-1]
 846 |                 #TODO: Test if directory or file is not 404 ??
 847 |                 tmpRobots.append(baseUrl+line)
 848 | 
 849 |             ################
 850 |             # requests all value for N sitemap url
 851 |             ################
 852 |             for link in tmpSiteMap:
 853 | 
 854 |                 if link.endswith("/"): #scan directories and files
 855 | 
 856 |                     for dir2 in self._parsed_json["directories"]:
 857 |                         self.verbose("RequestSiteMap dir/file for: " + link + dir2["name"] + slash)
 858 |                         self._requestor.addRequest(link + dir2["name"] + slash,data)
 859 | 
 860 |                     for files in self._parsed_json["files"]:
 861 |                         self.verbose("RequestSiteMap dir/file for: " + link + files["name"])
 862 |                         self._requestor.addRequest(link + files["name"],data)
 863 | 
 864 |                 else:  #scan extensions and suffix/prefix
 865 |                     # call our files in all directories
 866 |                     for ext in self._parsed_json["extensions"]:
 867 |                         self.verbose("RequestSitemap file/ext/ext for: " + link + ext["name"])
 868 |                         self._requestor.addRequest(link + ext["name"],data)
 869 | 
 870 |                         #Get the file extension of the current sitemap url to replace the extension
 871 |                         tmpUrl = urlparse(link)
 872 |                         if len(tmpUrl.path.split(".")[-1:]) > 1:
 873 |                             newUrl = ".".join(tmpUrl.path.split(".")[:-1])+ext["name"]
 874 |                             self.verbose("RequestSiteMap file/ext for: " + newUrl)
 875 |                             self._requestor.addRequest(newUrl,data)
 876 | 
 877 |             ################
 878 |             #requests all values for robots path
 879 |             ################
 880 |             for link in tmpRobots:
 881 |                 tmpUrl = baseUrl + link
 882 |                 if link.endswith("/"):  # scan directories and files
 883 |                     for dir2 in self._parsed_json["directories"]:
 884 |                         self.verbose("RequestRobots dir/file for: " + tmpUrl + dir2["name"] + slash)
 885 |                         self._requestor.addRequest(tmpUrl + dir2["name"] + slash,data)
 886 | 
 887 |                     for files in self._parsed_json["files"]:
 888 |                         self.verbose("RequestRobots dir/file for: " + tmpUrl + files["name"])
 889 |                         self._requestor.addRequest(tmpUrl + files["name"],data)
 890 |                 else:
 891 |                     for ext in self._parsed_json["extensions"]:
 892 |                         self.verbose("RequestRobots file/ext/ext for: " + tmpUrl + ext["name"])
 893 |                         self._requestor.addRequest(tmpUrl + ext["name"],data)
 894 | 
 895 |                         #Get the file extension of the current sitemap url to replace the extension
 896 |                         tmpUrl1 = urlparse(link)
 897 |                         if len(tmpUrl1.path.split(".")[-1:]) > 1:
 898 |                             newUrl = ".".join(tmpUrl1.path.split(".")[:-1])+ext["name"]
 899 |                             self.verbose("RequestRobots file/ext for: " + newUrl)
 900 |                             self._requestor.addRequest(newUrl,data)
 901 | 
 902 | 
 903 |         #TODO :  path and words/merge words
 904 | 
 905 |         ################
 906 |         #Request from words
 907 |         ################
 908 |         #print self._words
 909 | 
 910 | 
 911 |         #TODO: loop over: sitemap (done), robots (done), words/mergedwords(fixed for textblob required), bruteforce(later)  Maybe comments data?
 912 |         # - add the data to our stack to request and parse by the Requestor object
 913 |         # - Get current query path and files & Filter out static object from the request (images,etc.)
 914 |         #filter out: gif,jpg,png,css,ico
 915 | 
 916 | 
 917 |         print "Done. Waiting for more URL...!"
 918 | 
 919 |     '''----------------------------------------------------------------------------------------------------------
 920 |     Get the data for smartRequest(), it will fills our list of words which will be our smart logic data to create
 921 |     multiple new HTTP requests. This data should be gather once.
 922 |     ----------------------------------------------------------------------------------------------------------'''
 923 |     #TODO: split some of this works in different functions
 924 |     def getSmartData(self, data):
 925 | 
 926 |         ################################################################
 927 |         # Get the url and its data to create the new smart requests
 928 |         ################################################################
 929 |         urlString = str(data.getUrl()) #cast to cast to stop the TYPEerror on URL()
 930 |         domain = data.getDomain()
 931 |         netloc = data.getNetloc()
 932 |         directories = data.getDirectories()
 933 |         lastDirectory = data.getLastDirectory()
 934 |         params = data.getParams()
 935 |         fileExt = data.getFileExt()
 936 |         completeUrl = data.getCompleteURL()
 937 |         baseUrl = data.getBaseUrl()
 938 | 
 939 |         #Java URL to be used with Burp API
 940 |         url = URL(urlString)
 941 |         self._logger.debug("Current URLString: "+urlString)
 942 |         ######################### SPIDER EXECUTION #####################
 943 |         # Get some words from the web page: do it once!
 944 |         # Note: This step could be threaded using Queue.Queue but there is
 945 |         # little advantage as we need to wait to get all the value anyway
 946 |         ################################################################
 947 | 
 948 |         self._logger.debug("Has the Spider ran for? : "+ domain)
 949 |         if domain not in self._spiderRan: #doing it once
 950 |             self._spiderRan[domain] = True
 951 |             self._logger.debug("No")
 952 | 
 953 |             #self._mergedWords[domain] = {}
 954 |             #self._words[domain] = {}
 955 | 
 956 |             #Start URL, number of page to spider through, request class object to use
 957 | 
 958 |             spider = Spider(data, self._configSpider_NumberOfPages, self._requestor,self._logger)
 959 |             spider.runSpidering()
 960 | 
 961 |             #Get words from the spidering
 962 |             self._words[domain] = spider.getWords()
 963 |             #Get merged words
 964 |             #spider.mergeWords()
 965 |             #self._mergedWords[domain] = spider.getMergedWords()
 966 | 
 967 |             self._logger.debug("Length of Words: "+ str(len(self._words[domain])))
 968 |             #self._logger.debug("Length of MergedWords: "+ str(len(self._mergedWords[domain])))
 969 |             self._logger.info("SPIDER DONE")
 970 |         else:
 971 |             self._logger.debug("Yes")
 972 | 
 973 |         ################################################################
 974 |         # Get robots.txt (once)
 975 |         # Retrieve unique path and files from the robots.txt
 976 |         ################################################################
 977 |         if domain not in self._robots: #do it once
 978 |             print " robot "
 979 | 
 980 |             #get the file
 981 |             queueRobot = Queue.Queue(1)
 982 |             self._logger.info("robot")
 983 |             thread = threading.Thread(
 984 |                 target=self._requestor.runRequest,
 985 |                 name="Thread-Robots",
 986 |                 args=[baseUrl+"/robots.txt", queueRobot],
 987 |             )
 988 |             thread.start()
 989 |             thread.join()
 990 |             response = queueRobot.get()
 991 | 
 992 |             #Parse the file for disallow lines
 993 |             robotList = []
 994 |             for item in response.content.split('\n'):
 995 |                 if item:
 996 |                     i = item.split(':')
 997 |                     if i[0].lower() == "disallow" and i[1] not in robotList:
 998 |                         robotList.append(i[1])
 999 | 
1000 |             #add to domain list
1001 |             self._robots[domain] = robotList
1002 | 
1003 |             self._logger.debug("ROBOT LIST for : " + domain + ":")
1004 |             for item in self._robots[domain]:
1005 |                 self._logger.debug(item)
1006 | 
1007 |             self._logger.info("ROBOTS DONE")
1008 | 
1009 |         else:
1010 |             print "no robot"
1011 |             self._logger.debug("Robots.txt already checked for: " + baseUrl)
1012 | 
1013 |         ################################################################
1014 |         # Get sitemap.xml (once)
1015 |         # test those url for all files/extensions if not in local deque yet
1016 |         ################################################################
1017 |         if domain not in self._sitemap:
1018 |             print " sitemap "
1019 |             queueSitemap = Queue.Queue(1)
1020 |             thread = threading.Thread(
1021 |                 target=self._requestor.runRequest,
1022 |                 name="Thread-Sitemap",
1023 |                 args=[baseUrl+"/sitemap.xml", queueSitemap],
1024 |             )
1025 |             thread.start()
1026 |             thread.join()
1027 | 
1028 |             response = queueSitemap.get()
1029 |             soup = BeautifulSoup(response.content, "html.parser")
1030 | 
1031 |             #Parse the XML TODO: for N instance related to .ini config
1032 |             sitemapList = []
1033 |             for url in soup.findAll("loc"):
1034 |                 sitemapList.append(url.text)
1035 | 
1036 |             self._sitemap[domain] = sitemapList
1037 | 
1038 |             self._logger.debug("Sitemap.xml nbr of items: "+str(len(self._sitemap[domain])))
1039 | 
1040 |             self._logger.info("SITEMAP DONE")
1041 |         else:
1042 |             print "no sitemap"
1043 | 
1044 |         ################################################################
1045 |         # Get domain name relative values
1046 |         # test those names for directory, files with extension
1047 |         ################################################################
1048 |         print "smartDomain"
1049 |         tmpDomValue = []
1050 | 
1051 |         if domain == "localhost":
1052 |             tmpDomValue.append(domain)
1053 |         else:
1054 |             tld = get_tld(urlString, as_object=True)
1055 |             tmpDomValue.append(tld.domain)
1056 |             tmpDomValue.append(tld.tld)
1057 | 
1058 |             if tld.subdomain:
1059 |                 tmpDomValue.append("".join(tld.subdomain+"." + tld.tld))
1060 | 
1061 | 
1062 |         self._smartDomain[domain] = tmpDomValue
1063 | 
1064 |         ######################## BRUTE FORCE DATA ######################
1065 |         # 1, 2 or 3 letters brute force of current directory
1066 |         # Has the current directory been test already? No: do it
1067 |         #brute force function or object?
1068 |         ################################################################
1069 |         #TODO: Later version
1070 |         #charset = "abcdefghijklmnopqrstuvwxyz0123456789_-"
1071 |         #for a in itertools.product(charset,repeat=2):
1072 |         #    sub="".join(a)
1073 | 
1074 | 
1075 |         return True
1076 | 
1077 | 
1078 |     '''----------------------------------------------------------------------------------------------------------
1079 |     Get the information inside response for smartRequest()
1080 |     It will look for URL and email domain inside HTML comments
1081 | 
1082 |     @todo: Optimize the IFs in the comment for loop!
1083 |     ----------------------------------------------------------------------------------------------------------'''
1084 |     def getUrlInComments(self,data):
1085 | 
1086 |         ################### CURRENT DIRECTORIES/FILES ##################
1087 |         # Get current directory(ies)
1088 |         # validate if tested already
1089 |         # If not deal with: test directories and files at currentPath
1090 |         # New class object?
1091 |         ################################################################
1092 |         responseData = data.getResponseData()
1093 | 
1094 |         #TODO: Parse HTML files for comments for Path and file
1095 | 
1096 |         #if you have a response
1097 |         if responseData:
1098 |             soup = BeautifulSoup(responseData, "html.parser")
1099 |             comments=soup.find_all(string=lambda text:isinstance(text,Comment))
1100 |             regUrl = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
1101 |             regEmail = r"[\w\.-]+@[\w\.-]+"
1102 |             urlsInComments = []
1103 |             emailsInComments= []
1104 |             urlsInComment = []
1105 |             emailsInComment = []
1106 | 
1107 |             for comment in comments:
1108 |                 #get urls
1109 |                 urlsComments = re.findall(regUrl,comment)
1110 |                 #parse url, does the domain the same as our current domain?
1111 |                 if urlsComments:
1112 |                     for url in urlsComments[0]:
1113 |                         if url:
1114 |                             #Get URLs
1115 |                             tempData = urlparse(url)
1116 |                             domainInUrlTemp = '{uri.netloc}'.format(uri=tempData).split('.')
1117 |                             domainInUrl = ".".join(domainInUrlTemp)
1118 | 
1119 |                             #TODO: url will need to be verify if in scope when we call it : keep the URL path/file for scan
1120 |                             urlsInComment = re.findall(regUrl,comment)
1121 |                             urlsInComments.append(urlsInComment)
1122 | 
1123 |                     #get emails
1124 |                     emailsInComment = re.findall(regEmail, comment)
1125 |                     emailsInComments.append(emailsInComment)
1126 |             self._logger.debug("url in comments and email in comments:")
1127 | 
1128 |             #get list only
1129 |             if urlsInComments and urlsInComments[0]:
1130 |                 if type(urlsInComment[0]) is tuple:
1131 |                     self._urlsInComment[data.getDomain] = urlsInComment[0]
1132 |             #TODO: use email in another version?
1133 |             if emailsInComments and emailsInComments[0]:
1134 |                 if type(emailsInComments[0]) is tuple:
1135 |                     emailsInComments = emailsInComments[0]
1136 | 
1137 |             self._logger.debug(urlsInComments)
1138 |             self._logger.debug(emailsInComments)
1139 | 
1140 |             self._logger.info("COMMENTS DONE")
1141 | 
1142 | 
1143 |             #TODO: finish these function to gather the information from the data.json
1144 |         '''
1145 |     Function which is accessing smart list of Path to look into by the smart request function
1146 |     '''
1147 |     def getSmartListPath(self):
1148 |         return
1149 | 
1150 |     '''
1151 |     Function which is accessing smart list of file extension to look into by the smart request function
1152 |     '''
1153 |     def getSmartListExt(self):
1154 |         return
1155 | 
1156 |     '''
1157 |     Function which is accessing smart list of directories to look into by the smart request function
1158 |     '''
1159 |     def getSmartDirectories(self):
1160 |         return
1161 | 
1162 |     '''
1163 |     Function which is accessing smart list of files to look into by the smart request function
1164 |     '''
1165 |     def getSmartFiles(self):
1166 |         return
1167 | 
1168 |     '''
1169 |     This functions split all informations of the URL for further use in the smartRequest function
1170 |     @param messageInfo: last request executed with all its information
1171 |     '''
1172 |     def getURLdata(self,messageInfo,messageIsRequest):
1173 | 
1174 |         analyzedRequest = self._helpers.analyzeRequest(messageInfo)
1175 |         url = analyzedRequest.getUrl()
1176 |         self._logger.debug(url)
1177 | 
1178 |         parsed = urlparse(url.toString())
1179 | 
1180 |         '''debug info
1181 |         print 'scheme  :', parsed.scheme
1182 |         print 'netloc  :', parsed.netloc
1183 |         print 'path    :', parsed.path
1184 |         print 'params  :', parsed.params
1185 |         print 'query   :', parsed.query
1186 |         print 'fragment:', parsed.fragment
1187 |         print 'username:', parsed.username
1188 |         print 'password:', parsed.password
1189 |         print 'hostname:', parsed.hostname, '(netloc in lower case)'
1190 |         print 'port    :', parsed.port
1191 |         '''
1192 | 
1193 |         #Is there any parameters?
1194 |         params = analyzedRequest.getParameters()
1195 | 
1196 |         for p in params:
1197 |             self._logger.debug("Query var: "+p.getName())
1198 |             self._logger.debug("Query value: "+p.getValue())
1199 | 
1200 |         #getURL, needs to be a string before parsing it with urlparse
1201 |         completeURL = url.toString()
1202 |         self._logger.debug("Complete URL: "+completeURL)
1203 | 
1204 |         #URL sans port/dir/params
1205 |         baseURL = messageInfo.getHttpService().toString()
1206 |         self._logger.debug("Base URL: "+baseURL)
1207 | 
1208 | 
1209 |         #Get path including directories and file extension
1210 |         path = urlparse(completeURL).path.encode("utf-8")
1211 |         filename = path.split('/')[-1:].pop().split('.')[:1].pop()
1212 |         fileExt = path.split('.')[1:]
1213 |         fileExt = "".join(fileExt)
1214 |         directories = path.split('/')[1:-1]
1215 |         directory = "/".join(directories)
1216 |         if len(fileExt) > 0:
1217 |             self._logger.debug("Directories: "+str(directories)[1:-1])
1218 |             self._logger.debug("Directory: "+directory)
1219 |             self._logger.debug("File Extension: "+fileExt)
1220 |             self._logger.debug("URL Path: "+path)
1221 |             self._logger.debug("Filename: "+filename)
1222 |         else:
1223 |             self._logger.debug("No file Extension, directory is: "+path)
1224 | 
1225 |         #Get domain and netloc
1226 |         netloc = parsed.netloc.encode("utf-8")
1227 |         domain = netloc.split(':')[0]
1228 | 
1229 |         self._logger.debug("Domain/: "+domain)
1230 | 
1231 |         '''
1232 |         print "Complete URL: "+completeURL
1233 |         print "Domain: "+domain
1234 |         print "Netloc: "+ netloc
1235 |         print "Query value: "+p.getValue()
1236 |         print "Query var: "+p.getName()
1237 |         print "Directories: "+str(directories)[1:-1]
1238 |         print "Directories2: "+str(directories)
1239 |         print "Directory: "+directory
1240 |         print "File Extension: "+fileExt
1241 |         print "URL Path: "+path
1242 |         print "Filename: "+filename
1243 |         print "Base URL: "+baseURL
1244 |         '''
1245 | 
1246 |         responseData = ""
1247 |         if not messageIsRequest: #when it's a response, get the response data
1248 |             content = messageInfo.getResponse()
1249 |             response = self._helpers.analyzeResponse(content)
1250 |             responseData = self._helpers.bytesToString(content[response.getBodyOffset():])
1251 | 
1252 |             #data = UrlData("",headers,"","","","","","",responseData,self._logger)
1253 | 
1254 |         data = UrlData(url,domain,netloc,directories,params,filename,fileExt,baseURL,completeURL,path,responseData,self._logger)
1255 |         return data
1256 | 
1257 |     # This method is called when multiple issues are reported for the same URL
1258 |     # In this case we are checking if the issue detail is different, as the
1259 |     # issues from our scans include affected parameters/values in the detail,
1260 |     # which we will want to report as unique issue instances
1261 |     def consolidateDuplicateIssues(self, existingIssue, newIssue):
1262 |         if (existingIssue.getIssueDetail() == newIssue.getIssueDetail()):
1263 |             return -1
1264 |         else:
1265 |             return 0
1266 | 
1267 |     #Have to be implemented
1268 |     def doPassiveScan(self, baseRequestResponse):
1269 |         pass
1270 | 
1271 |     #Have to be implemented
1272 |     def doActiveScan(self, baseRequestResponse, insertionPoint):
1273 |         pass
1274 | '''
1275 | Multithreaded class to execute queries out of the Queue.Queue
1276 | 
1277 | Also get the response and validate the 404 type
1278 | '''
1279 | class RequestorWorker(threading.Thread):
1280 | 
1281 |     def __init__(self, threadID, name, queue, error404, logger, requestor, UI, recursiveURLs):
1282 | 
1283 |         #Sahred Queue between Thread Workers
1284 |         self._id = threadID
1285 |         self._name = name
1286 |         self._queue = queue #request queue received from the Requestor
1287 |         self._threadLock = threading.Lock()
1288 |         self._alive = True
1289 |         threading.Thread.__init__(self)
1290 |         self.daemon = True
1291 |         #self._responseQueue = responseQueue
1292 |         self._error404 = error404
1293 |         self._logger = logger
1294 |         self._requestor = requestor
1295 |         self._ui = UI
1296 |         self._recursiveURLs = recursiveURLs
1297 | 
1298 |         self._acceptedCode = (200,400,401,403,500)
1299 | 
1300 |         #TODO: Set a randomizer of user-agent and add the option in .ini file
1301 |         self._headers = {
1302 |             'User-Agent': 'Mozilla/5.0'
1303 |         }
1304 | 
1305 |         return
1306 | 
1307 |     '''
1308 |     Return type of 404 for requested domain
1309 | 
1310 |     @param domain: domain to fetch error 404 type
1311 |     '''
1312 |     def _getError404(self,url):
1313 |         #Get domain and netloc
1314 |         parsed = urlparse(url)
1315 |         netloc = parsed.netloc.encode("utf-8")
1316 |         domain = netloc.split(':')[0]
1317 |         return self._error404[domain]
1318 | 
1319 |     def run(self):
1320 |         while(self._alive):
1321 |             #waiting for queue
1322 |             #print "Waiting for queue: "+self._name
1323 |             url = self._queue.get()
1324 | 
1325 |             #print "TASK RECEIVED: " + url + " From: " + self._name
1326 | 
1327 |             self._logger.debug(self._name+" requesting(URL): " + url)
1328 |             self._logger.info(self._name+" requesting(URL): " + url)
1329 |             #print "[Requesting] " + url
1330 | 
1331 |             #TODO: randomizedUserAgent
1332 |             #TODO: - 302 (redirect) --> parse the redirect URL (in scope ok, in sitemap stop, not in site map  add to queue : 200+window.location or JS isn't catch yet
1333 | 
1334 |             response = requests.get(url, headers=self._headers, allow_redirects=False)
1335 | 
1336 |             if response.status_code in self._acceptedCode:
1337 |                 #add no false positive to site map
1338 |                 code = self._getError404(url)
1339 |                 print "[URL EXISTS](Response: " +str(response.status_code)+ ") | 404 type:" + str(code) +" | FOR URL: "+ str(url)
1340 | 
1341 |                 #False positive logic.
1342 |                 #TODO: can be update or upgraded for sure! :)
1343 |                 fp = ""
1344 | 
1345 | 
1346 |                 '''
1347 |                 si 404
1348 |                     si response 200 ok
1349 |                     si response 401
1350 |                     si response 403
1351 |                     si response 300
1352 |                     si response 500
1353 |                 si 403
1354 |                     si response 200
1355 |                     si response 401 fp
1356 |                     si response 403 fp
1357 |                     si response 300
1358 |                     si response 500 fp
1359 |                 si 500
1360 |                     si response 200
1361 |                     si response 401
1362 |                     si response 403
1363 |                     si response 300
1364 |                     si response 500 fp
1365 |                 si intext
1366 |                     si response 200 need reverification fp
1367 |                     si response 401
1368 |                     si response 403
1369 |                     si response 300
1370 |                     si response 500
1371 |                 si 300
1372 |                     si response 200
1373 |                     si response 401
1374 |                     si response 403
1375 |                     si response 300 fp
1376 |                     si response 500
1377 |                 '''
1378 | 
1379 |                 #if the current request is a 403 and the 404 page isn't a 403 page, should be false positive
1380 |                 if response.status_code == 403 and code != 403:
1381 |                     fp = " ,False Positive"
1382 |                 #if current response is a 200 and the 404 page was inside a 200 code page, it can be a false positive
1383 |                 elif response.status_code == 200 and code == "404 in page":
1384 |                     fp = " ,False Positive"
1385 |                 #if 404 page is inside a 200 response code, a 300 redirect page or a 403, many possible false positive
1386 |                 elif code == "404 in page" or code == 300 or code == 403:
1387 |                     fp = " ,Possible False Positive"
1388 |                 #code is 200 or whatnot
1389 |                 else: #TODO: define all directory in a list and add to the recursive list+validate latest directory of current url to see if it is in list, if not add it
1390 |                     print 200
1391 |                     #if it's a direct directory, let's recurse... if not recurse too much already!
1392 |                     #if urlparse(url).path[-1] == '/' and self._recursiveURLs.get(str(url), 0) <= self._ui.getRecursiveConfig():
1393 |                     #    self._recursiveURLs[str(url)] = self._recursiveURLs.get(str(url), 0) + 1 #adjust the recursed level for that directory
1394 |                     #    self._requestor.runRequest(url,Queue.Queue(1))
1395 | 
1396 |                 #add code to the Jlist here
1397 |                 print url
1398 |                 self._ui.addURL(url + " , ("+str(response.status_code)+")" + fp)
1399 | 
1400 | 
1401 | 
1402 |                 #TODO: add page to SiteMap if not there already?
1403 | 
1404 |                 #TODO: issue = SmartBusterIssue()
1405 |                 #might need to parse the url into data for the issue?
1406 |                 #issue=ScanIssue(baseRequestResponse.getHttpService(), self._helpers.analyzeRequest(baseRequestResponse).getUrl(), httpmsgs, ISSUE_NAME, ISSUE_DETAIL, SEVERITY, CONFIDENCE, REMEDIATION_DETAIL, ISSUE_BACKGROUND, REMEDIATION_BACKGROUND)
1407 |                 #self._callbacks.addScanIssue(issue)
1408 | 
1409 | '''----------------------------------------------------------------------------------------------------------------------------------------
1410 | Class to hold the Request data
1411 | 
1412 | - Using Requests API we use a Queue to append HTTP requests to be executed.
1413 | - If the requests return a 200/401/403/500 we add them to the sitemap and add them to our list of URL/Dir/file found
1414 | - Can save data found to csv
1415 | ----------------------------------------------------------------------------------------------------------------------------------------'''
1416 | class Requestor():
1417 |     '''
1418 |     Initialize
1419 | 
1420 |     '''
1421 |     def __init__(self,logger,UI):
1422 | 
1423 |         #Queue to hold URL to request
1424 |         #Each item will be a URL string str(URL)
1425 |         self._requestQueue = Queue.Queue(0)
1426 |         self._logger = logger
1427 | 
1428 |         #hold type of 404 error by domain
1429 |         self._error404 = {}
1430 | 
1431 |         #hold url that are being recursive
1432 |         self._recursiveURLs = []
1433 | 
1434 |         #Queue to hold URL and their response code
1435 |         #Each item will be a list (url,code)
1436 |         #self._responseQueue = deque()
1437 | 
1438 |          #TODO: Set a randomizer of user-agent and add the option in .ini file
1439 |         self._headers = {
1440 |             'User-Agent': 'Mozilla/5.0'
1441 |         }
1442 | 
1443 |         self._logger.debug("Requestor object created")
1444 | 
1445 |         threads = [] #list containing threads
1446 | 
1447 |         #1 thread needed for infofestival. Don't know how to split the pages between workers
1448 |         for i in range(0,40):#TODO: Set a number of thread in UI
1449 |             t = RequestorWorker(i,"RequestorWorker-"+str(i),self._requestQueue,self._error404, logger, self, UI, self._recursiveURLs)
1450 |             threads.append(t)
1451 |             t.start()
1452 | 
1453 |         return
1454 | 
1455 | 
1456 |     '''
1457 |     Add a request to the queue to be execute by a thread worker (RequestorWorker)
1458 | 
1459 |     @param url: the URL to get a response from
1460 |     '''
1461 |     def addRequest(self,url,data):
1462 | 
1463 | 
1464 |         #print "ADDING: "+ url
1465 | 
1466 |         #get the 404 details for the current domain
1467 |         self._define404(data)
1468 |         self._requestQueue.put(url) ##see if we can put the type404 inside the queue along with the url
1469 |         return
1470 | 
1471 |     '''
1472 |     Define 404 type of the current domain
1473 |     '''
1474 |     def _define404(self,data):
1475 | 
1476 |         domain = data.getDomain()
1477 |         #only do once per domain
1478 |         if domain not in self._error404:
1479 | 
1480 |             code = 404
1481 |             errorQueue = Queue.Queue(0)
1482 | 
1483 |             #get a 404 page
1484 |             m = hashlib.md5()
1485 |             m.update(str(random.random()))
1486 | 
1487 |             url = data.getBaseUrl()+"/"+m.hexdigest()
1488 |             print url
1489 |             self.runRequest(url,errorQueue)
1490 |             response = errorQueue.get()
1491 | 
1492 |             #if website use standard 404 error, everything is good
1493 |             if response.status_code == 404:
1494 |                 code = 404
1495 | 
1496 |             #if website used a 3xx code
1497 |             if 310 - response.status_code < 11 and 310 - response.status_code > 0:
1498 |                 code = 300
1499 | 
1500 |             if response.status_code == 403:
1501 |                 code = 403
1502 | 
1503 |             #if website use a 5xx code
1504 |             if 510 - response.status_code < 11 and 510 - response.status_code > 0:
1505 |                 code = 500
1506 | 
1507 |             #if website use a 200
1508 |             if response.status_code == 200:
1509 | 
1510 |                 soup = BeautifulSoup(response.content, "html.parser")
1511 | 
1512 |                 ################################
1513 |                 #TODO: more use case to add
1514 |                 ################################
1515 |                 if soup.findAll(text=re.compile("page not found")):
1516 |                     code = "404 in page"
1517 |                 elif soup.findAll(text=re.compile("404")):
1518 |                     code = "404 in page"
1519 |                 elif soup.findAll(text=re.compile("page does not exist")):
1520 |                     code = "404 in page"
1521 |                 elif soup.findAll(text=re.compile("error 404")):
1522 |                     code = "404 in page"
1523 | 
1524 |             #define which code is refer to a 404
1525 |             self._error404[domain] = code
1526 | 
1527 |         return
1528 | 
1529 |     '''
1530 |     Run a NON DELAYED (no thread workers) request and save the url:response code to the response deque class variable
1531 | 
1532 |     @param url: the URL to request and get a response
1533 |     @param responseQueue: thread safe queue to send the response back to the spider or other objects
1534 |     '''
1535 |     def runRequest(self,url,responseQueue):
1536 | 
1537 |         #TODO: After thread is done, in thread read the _requestQeue object
1538 | 
1539 |         self._logger.debug("runRequest(URL): "+url)
1540 |         self._logger.info("EXECUTING REQUEST FOR: "+url)
1541 |         response = requests.get(url,  headers=self._headers, allow_redirects=False)
1542 |         responseQueue.put(response)
1543 | 
1544 |         #TODO: Get code
1545 |         #TODO: add page to SiteMap if not there already?
1546 | 
1547 | 
1548 |         self._logger.debug("runRequest done  for: "+url)
1549 | 
1550 |         return
1551 | 
1552 |     #TODO randomizedUserAgent
1553 |     def randomizedUserAgent(self):
1554 |         return
1555 | 
1556 | 
1557 | 
1558 | 
1559 | '''----------------------------------------------------------------------------------------------------------------------------------------
1560 | Class to hold the Spidering data
1561 | 
1562 | - Based on: http://www.netinstructions.com/how-to-make-a-web-crawler-in-under-50-lines-of-python-code/
1563 |   Uses BeautifulSoup, require to download/install it.
1564 | ----------------------------------------------------------------------------------------------------------------------------------------'''
1565 | class Spider():
1566 | 
1567 |     '''
1568 |     Initialize
1569 | 
1570 |     @param startUrl: the URL to start the spidering
1571 | 
1572 |     '''
1573 |     def __init__(self, data, maxPages, requestObj, logger):
1574 |         self._data = data
1575 |         self._words = []
1576 |         self._mergedWords = []
1577 |         self._maxPages = int(maxPages)
1578 |         self._requestor = requestObj
1579 |         self._queue = Queue.Queue(self._maxPages)
1580 |         self._domain = data.getDomain()
1581 |         self._logger = logger
1582 |         self._logger.debug("Spider object created")
1583 | 
1584 |     '''
1585 |     Run the spidering
1586 | 
1587 |     @return: list of all words found
1588 |     @todo: use TextBlob for other language, right now mostly only english based words will be categorized correctly.
1589 |     '''
1590 |     def runSpidering(self):
1591 | 
1592 |         urlString = str(self._data.getUrl())
1593 |         url = URL(urlString)
1594 | 
1595 |         print "Spider, URL: " + urlString
1596 |         #Get the words from the URL, starting with the startUrl
1597 |         link_list = [urlString]
1598 | 
1599 |         #Counter
1600 |         pagesVisited = 0
1601 | 
1602 |         self._logger.debug("Max pages to visit: " + str(self._maxPages))
1603 | 
1604 |         while int(pagesVisited) < int(self._maxPages):
1605 |             self._logger.debug("Nbr Page Visited: " + str(pagesVisited) + " / " + str(self._maxPages))
1606 |             self._logger.debug("Visiting: " + link_list[pagesVisited])
1607 |             visitingUrl = link_list[pagesVisited]
1608 |             pagesVisited = pagesVisited+1
1609 |             print "Visiting URL: "+visitingUrl
1610 |             try:
1611 |                 #??? Fix the url retrieve.
1612 |                 #If it starts with / we add the domain to it
1613 |                 if self._domain not in visitingUrl:
1614 |                     if visitingUrl.startswith("/"):
1615 |                         visitingUrl = visitingUrl[1:]
1616 |                         #TODO: startswith /#
1617 | 
1618 |                     link_list[pagesVisited] = self._data.getCompleteURL() + visitingUrl
1619 |                     visitingUrl = link_list[pagesVisited]
1620 | 
1621 |                 #send an asynchronus HTTP request and wait for the response
1622 |                 thread = threading.Thread(
1623 |                                 target=self._requestor.runRequest,
1624 |                                 name="Thread-Spider",
1625 |                                 args=[visitingUrl, self._queue],
1626 |                                 )
1627 |                 thread.start()
1628 |                 thread.join()
1629 |                 response = self._queue.get()
1630 |                 self._logger.debug("Response received from: "+visitingUrl)
1631 | 
1632 |                 #Get the soup
1633 |                 soup = BeautifulSoup(response.content, "html.parser")
1634 | 
1635 |                 #Get the visible text
1636 |                 [s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])]
1637 |                 visible_texts = soup.getText()#.encode('utf-8').strip()
1638 |                 #Get the text blob
1639 |                 blob = TextBlob(visible_texts)
1640 | 
1641 |                 #Get the words : TODO: add the 1000 value in the bsb.ini?
1642 |                 if len(blob.words) <= 1000: #merging 2 words and up to 1000  (cpu intensivity)
1643 |                     for words,tag in blob.tags:
1644 |                         #Get only noun and numbers
1645 |                         if tag.startswith("NN") or tag == "CD":
1646 |                             self._words.append(words)
1647 | 
1648 |                 self._logger.debug("Size of WORDS: " + str(len(self._words)))
1649 | 
1650 |                 #Get the links for next pages or stop
1651 |                 aSoup = soup.findAll("a")
1652 |                 if len(aSoup) > 0:
1653 |                     for i in aSoup:
1654 |                         #Do not use previous page, index or anchors
1655 |                         if not i['href'].startswith("#") and not i['href'] == "/" and not i['href'] in i and not i['href'].startswith("/#") and not i['href'].startswith("//"):
1656 |                             link_list.append(i['href'])
1657 |                 else:
1658 |                     self._logger.debug("No words on: "+visitingUrl)
1659 |                     break
1660 | 
1661 |             except KeyError:
1662 |                 self._logger.error("SpiderError: KeyError")
1663 |                 pass
1664 |             except requests.exceptions.RequestException as e:
1665 |                 self._logger.error("SpiderError: "+e.reason)
1666 |                 pass
1667 | 
1668 |         return self._words
1669 | 
1670 |     '''
1671 |     Merge the obtained words from the spidering
1672 | 
1673 |     @return: List of all words mixed with each others
1674 |     '''
1675 |     def mergeWords(self):
1676 |         if len(self._words) > 1:
1677 | 
1678 |             #original list of words that we want to mix
1679 |             listOriginal = self._words
1680 | 
1681 |             #merging all words together
1682 |             for words in listOriginal:
1683 |                 for wordsToMerge in listOriginal:
1684 |                     self._mergedWords.append(words+wordsToMerge)
1685 | 
1686 |             return True
1687 |         else:
1688 |             return False
1689 | 
1690 | 
1691 |     '''
1692 |     @return: List of all words mixed with each others
1693 |     Note: The return words needs to be convert to utf-8
1694 |     '''
1695 |     def getMergedWords(self):
1696 |          return self._mergedWords
1697 | 
1698 |     '''
1699 |     @return: List of all words
1700 |     Note: The return words needs to be convert to utf-8
1701 |     '''
1702 |     def getWords(self):
1703 |          return self._words
1704 | 
1705 | 
1706 | '''----------------------------------------------------------------------------------------------------------------------------------------
1707 | Class to share community data to annonimized server
1708 | ----------------------------------------------------------------------------------------------------------------------------------------'''
1709 | class technologyScanner():
1710 | 
1711 |     def __init__(self, optIn, logger):
1712 |         self._optIn = optIn
1713 |         self._logger = logger
1714 | 
1715 |         self._logger.debug("CommunityData Object Created")
1716 | 
1717 |         return
1718 | 
1719 | '''----------------------------------------------------------------------------------------------------------------------------------------
1720 | Class to share community data to annonimized server
1721 | ----------------------------------------------------------------------------------------------------------------------------------------'''
1722 | class communityData():
1723 | 
1724 |     def __init__(self, optIn, logger):
1725 |         self._optIn = optIn
1726 |         self._logger = logger
1727 | 
1728 |         self._logger.debug("CommunityData Object Created")
1729 | 
1730 |         return
1731 | 
1732 |     def submitData(self,fileName,isFile):
1733 |         if self._optIn:
1734 | 
1735 |             #prepare the request to submit to the server
1736 |             if isFile:
1737 |                 print "Data is a file"
1738 |                 #data to sent is a file
1739 |             else:
1740 |                 print "data is a directory"
1741 |                 #data to sent is a directory
1742 | 
1743 |             #contact the server
1744 |             print "contacting the server with data: " + fileName
1745 |         return
1746 | 
1747 | '''----------------------------------------------------------------------------------------------------------------------------------------
1748 | Class to hold the URL data in separated parts
1749 | ----------------------------------------------------------------------------------------------------------------------------------------'''
1750 | class UrlData():
1751 | 
1752 |     def __init__(self,url,domain,netloc,directories,params,filename, fileExt,baseURL,completeURL,path,responseData,logger):
1753 |         self._url = url
1754 |         self._domain = domain
1755 |         self._netloc = netloc
1756 |         self._directories = directories
1757 |         self._params = params
1758 |         self._fileExt = fileExt
1759 |         self._baseURL = baseURL
1760 |         self._completeURL = completeURL
1761 |         self._responseData = responseData
1762 |         self._logger = logger
1763 |         self._path = path
1764 |         self._filename = filename
1765 | 
1766 |         self._logger.debug("UrlData object created")
1767 |         return
1768 | 
1769 |     def getPath(self):
1770 |         return self._path
1771 | 
1772 |     def getFilename(self):
1773 |         return self._filename
1774 | 
1775 |     def getResponseHeaders(self):
1776 |         if not self._url:
1777 |             return self._domain
1778 | 
1779 |     def getResponseData(self):
1780 |         return self._responseData
1781 | 
1782 |     def getBaseUrl(self):
1783 |         return self._baseURL
1784 | 
1785 |     def getCompleteURL(self):
1786 |         return self._completeURL
1787 | 
1788 |     def getUrl(self):
1789 |         return self._url
1790 | 
1791 |     def getDomain(self):
1792 |         return self._domain
1793 | 
1794 |     def getNetloc(self):
1795 |         return self._netloc
1796 | 
1797 |     def getDirectories(self):
1798 |         return self._directories
1799 | 
1800 |     def getLastDirectory(self):
1801 |         if len(self._directories) > 0:
1802 |             return self._directories[len(self._directories)-1]
1803 |         else:
1804 |             return ""
1805 | 
1806 |     def getParams(self):
1807 |         return self._params
1808 | 
1809 |     def getFileExt(self):
1810 |         return self._fileExt
1811 | '--------------------------------------------------------------------'
1812 | 
1813 | 
1814 | 
1815 | '''--------------------------------------------------------------------
1816 | Class to hold the Issues found
1817 | @TODO: see for Sitemap instead of issue or WITh issues
1818 | --------------------------------------------------------------------'''
1819 | class SmartBusterIssue(IScanIssue):
1820 |   '''This is our custom IScanIssue class implementation.'''
1821 |   def __init__(self, httpService, url, httpMessages, issueName, issueDetail, severity, confidence, remediationDetail, issueBackground, remediationBackground):
1822 |       self._issueName = issueName
1823 |       self._httpService = httpService
1824 |       self._url = url
1825 |       self._httpMessages = httpMessages
1826 |       self._issueDetail = issueDetail
1827 |       self._severity = severity
1828 |       self._confidence = confidence
1829 |       self._remediationDetail = remediationDetail
1830 |       self._issueBackground = issueBackground
1831 |       self._remediationBackground = remediationBackground
1832 | 
1833 | 
1834 |   def getConfidence(self):
1835 |       return self._confidence
1836 | 
1837 |   def getHttpMessages(self):
1838 |       return self._httpMessages
1839 |       #return None
1840 | 
1841 |   def getHttpService(self):
1842 |       return self._httpService
1843 | 
1844 |   def getIssueBackground(self):
1845 |       return self._issueBackground
1846 | 
1847 |   def getIssueDetail(self):
1848 |       return self._issueDetail
1849 | 
1850 |   def getIssueName(self):
1851 |       return self._issueName
1852 | 
1853 |   def getIssueType(self):
1854 |       return 0
1855 | 
1856 |   def getRemediationBackground(self):
1857 |       return self._remediationBackground
1858 | 
1859 |   def getRemediationDetail(self):
1860 |       return self._remediationDetail
1861 | 
1862 |   def getSeverity(self):
1863 |       return self._severity
1864 | 
1865 |   def getUrl(self):
1866 |       return self._url
1867 | 
1868 |   def getHost(self):
1869 |       return 'localhost'
1870 | 
1871 |   def getPort(self):
1872 |       return int(80)
1873 | 


--------------------------------------------------------------------------------
/DemoLabs - BurpSmartBuster - DEF CON 2016.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathetiq/BurpSmartBuster/2a5998184bae006a16620930f0b9ef3e2cfc9b2e/DemoLabs - BurpSmartBuster - DEF CON 2016.pdf


--------------------------------------------------------------------------------
/DerbyCon 2016 - BurpSmartBuster - Stable Talk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pathetiq/BurpSmartBuster/2a5998184bae006a16620930f0b9ef3e2cfc9b2e/DerbyCon 2016 - BurpSmartBuster - Stable Talk.pdf


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Patrick Mathieu (@PathetiQ)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Unsupported anymore
 2 | This application is unsupported anymore, but a new "version" is coming in the next few months! :)
 3 | 
 4 | ![BSB](http://i.imgur.com/rHToHhe.png)
 5 | 
 6 | # BurpSmartBuster
 7 | 
 8 | A Burp Suite content discovery plugin that add the smart into the Buster!
 9 | 
10 | ## Installation
11 | 
12 | - Now inside Burp Suite Store
13 | - Or [See Wiki page](https://github.com/pathetiq/BurpSmartBuster/wiki) for manual installation
14 | 
15 | 
16 | # Features
17 | 
18 | * Looks for files, directories and file extensions based on current requests received by Burp Suite
19 | * Checks for:
20 |   * Directories in the current URL directories
21 |   * Files in the current URL directories 
22 |   * Replace and add extension to current files
23 |   * Add suffix and prefix to current files
24 | * Easy and documented code
25 | * Verbose and logging 
26 | 
27 | # Todos
28 | 
29 | * In progress: Technological and environment checks (PHP, IIS, Apache, SharePoint, etc.)
30 | * In progress: Community data
31 | * Limit Thread speed
32 | * Use the spidering results for actual brute forcing
33 | 
34 | ## Presentations and release date
35 | 
36 | * BSB was released on August 6th 2016 at [DEF CON 24 Demolabs](https://defcon.org/html/defcon-24/dc-24-demolabs.html) in the Grand Salon. 
37 | * An updated talk about the tool and it's future has been done at [Derbycon 6.0](https://www.youtube.com/watch?v=RFxUfoVgMrw).
38 | * French version of the talk given at [Hackfest.ca 2016](https://www.youtube.com/watch?v=yIC9zmKLoxg)
39 | 
40 | ## Code workflow and options
41 | 
42 | See the Presentation PDF.
43 | 
44 | More information to come in the wiki.
45 | 


--------------------------------------------------------------------------------
/bsb.ini:
--------------------------------------------------------------------------------
 1 | ######################################
 2 | # BurpSmartBuster Config File
 3 | ######################################
 4 | 
 5 | 
 6 | #----------------------------------------------------------------------------------
 7 | # Define the number of request executed when a URL is browse from the user in Burp
 8 | #
 9 | # Recommended number of test is: 
10 | # The greater the number, the less stealth and fast it is
11 | #----------------------------------------------------------------------------------
12 | [NumberOfTests]
13 | Paths: 5
14 | Files: 5
15 | Extensions: 5
16 | Directories: 5
17 | 
18 | #----------------------------------------------------------------------------------
19 | # RecursiveDirs: The number of directories to test when accessing a deep down directory from a browsed URL  
20 | # Ex: http://site.com/a/b/c/file.php will be "3" and test will occurs on /a/ /a/b/ and /a/b/c/
21 | #
22 | # NumberOfPages: The number of page the Spider need to gather words from it for dynamic/smart analysis with the "smart" buster options.
23 | #                Do not mix this setting for the recursive number of deepness.
24 | #----------------------------------------------------------------------------------
25 | [Spider]
26 | RecursiveDirs: 3
27 | NumberOfPages: 5
28 | 
29 | #----------------------------------------------------------------------------------
30 | # File to use for testing directories
31 | #
32 | # local : Means it will use BsB files with context logic (on, off)
33 | # Smart : Will only use the current (browsed files) filename and directory names to test for basic predefine extension/file/dir (on, off) and data from the website
34 | # File : Push your own file (Ex: /usr/share/wordlist/list.lst) , no logic here
35 | # Spider: Use the spider to gather all info, like smart but it browse for you
36 | #----------------------------------------------------------------------------------
37 | [Smart]
38 | Local: off
39 | Smart: on
40 | File: off
41 | Spider: off
42 | verbose = off
43 | 
44 | #----------------------------------------------------------------------------------
45 | # InScope items
46 | # IMPORTANT Not using in scope only items will trigger the spidering and multiples request on website that are not in your scope!
47 | #----------------------------------------------------------------------------------
48 | [InScope]
49 | ScopeOnly: on
50 | 
51 | #----------------------------------------------------------------------------------
52 | # File extension to ignore inside the plugin
53 | # TODO: implement this
54 | #----------------------------------------------------------------------------------
55 | [Ignore]
56 | FileType: gif,jpg,png,css,js,ico,woff
57 | 
58 | [Technical]
59 | TrailingSlash: on
60 | 


--------------------------------------------------------------------------------
/data.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "extensions":[
  3 |     {"name":".doc", "description":"downloadable", "type":"default"},
  4 |     {"name":".docx", "description":"downloadable", "type":"default"},
  5 |     {"name":".xls", "description":"downloadable", "type":"default"},
  6 |     {"name":".xlsx", "description":"downloadable", "type":"default"},
  7 | 
  8 |     {"name":".zip", "description":"compress", "type":"default"},
  9 |     {"name":".rar", "description":"compress", "type":"default"},
 10 |     {"name":".tar.gz", "description":"compress", "type":"default"},
 11 |     {"name":".tar", "description":"compress", "type":"default"},
 12 |     {"name":".bak.zip", "description":"compress", "type":"default"},
 13 |     {"name":".bak.gz", "description":"compress", "type":"default"},
 14 |     {"name":".bak.z", "description":"compress", "type":"default"},
 15 |     {"name":".tar.gz", "description":"compress", "type":"default"},
 16 |     {"name":".tgz", "description":"compress", "type":"default"},
 17 |     {"name":".7z", "description":"compress", "type":"default"},
 18 | 
 19 |     {"name":".bak", "description":"backup", "type":"default"},
 20 |     {"name":".backup", "description":"backup", "type":"default"},
 21 |     {"name":".bk", "description":"backup", "type":"default"},
 22 |     {"name":".bac", "description":"backup", "type":"default"},
 23 |     {"name":".asd", "description":"backup", "type":"default"},
 24 |     {"name":".dsa", "description":"backup", "type":"default"},
 25 | 
 26 |     {"name":"~", "description":"autosave", "type":"default"},
 27 |     {"name":".swp", "description":"autosave", "type":"default"},
 28 |     {"name":".swo", "description":"autosave", "type":"default"},
 29 |     {"name":".save", "description":"autosave", "type":"default"},
 30 |     {"name":".asd", "description":"autosave", "type":"default"},
 31 |     {"name":".autosave", "description":"autosave", "type":"default"},
 32 |     {"name":"._", "description":"autosave", "type":"default"},
 33 | 
 34 |     {"name":".sql", "description":"database", "type":"default"},
 35 |     {"name":".sql.zip", "description":"database", "type":"default"},
 36 |     {"name":".sql.gz", "description":"database", "type":"default"},
 37 |     {"name":".sql.tar.gz", "description":"database", "type":"default"},
 38 |     {"name":".sql.tar", "description":"database", "type":"default"},
 39 |     {"name":".sql.rar", "description":"database", "type":"default"},
 40 |     {"name":".db", "description":"database", "type":"default"},
 41 |     {"name":".bd", "description":"database", "type":"default"},
 42 |     {"name":".diff", "description":"database", "type":"default"},
 43 |     {"name":".trn", "description":"database", "type":"default"},
 44 |     {"name":".bd", "description":"database", "type":"default"},
 45 | 
 46 |     {"name":"._Old", "description":"old", "type":"default"},
 47 |     {"name":"._old", "description":"old", "type":"default"},
 48 |     {"name":".old", "description":"old", "type":"default"},
 49 | 
 50 |     {"name":".dmp", "description":"random", "type":"default"},
 51 |     {"name":".sh", "description":"random", "type":"default"},
 52 | 
 53 |     {"name":".conf", "description":"configuration", "type":"default"},
 54 |     {"name":".ini", "description":"configuration", "type":"default"},
 55 |     {"name":".pem", "description":"certificate", "type":"default"},
 56 | 
 57 |     {"name":".dev", "description":"development", "type":"default"},
 58 |     {"name":".phps", "description":"development", "type":"default"}
 59 |   ],
 60 | 
 61 |   "fileprefix":[
 62 |     {"name":"~", "description":"backup", "type":"default"},
 63 |     {"name":".", "description":"backup", "type":"default"},
 64 | 
 65 |     {"name":"Old_", "description":"old", "type":"default"},
 66 |     {"name":"old_", "description":"old", "type":"default"},
 67 | 
 68 |     {"name":"Copy%20of%20", "description":"copy", "type":"default"}
 69 |   ],
 70 | 
 71 |   "filesuffix":[
 72 |     {"name":"~", "description":"backup", "type":"default"},
 73 | 
 74 |     {"name":"_Old", "description":"old", "type":"default"},
 75 |     {"name":"_old", "description":"old", "type":"default"},
 76 |     {"name":"%20-%20Copy", "description":"copy", "type":"default"},
 77 |     {"name":"(1)", "description":"copy", "type":"default"},
 78 |     {"name":"(2)", "description":"copy", "type":"default"},
 79 |     {"name":"(3)", "description":"copy", "type":"default"},
 80 |     {"name":"(4)", "description":"copy", "type":"default"},
 81 |     {"name":"(copy%201)", "description":"copy", "type":"default"},
 82 |     {"name":"(copy%202)", "description":"copy", "type":"default"},
 83 |     {"name":"(copy%203)", "description":"copy", "type":"default"},
 84 |     {"name":"(copy%204)", "description":"copy", "type":"default"},
 85 |     {"name":"(copy%201)(copy%201)", "description":"copy", "type":"default"},
 86 |     {"name":"%20-%20Copy", "description":"copy", "type":"default"},
 87 |     {"name":"%20copy", "description":"copy", "type":"default"},
 88 |     {"name":"%20(1)", "description":"copy", "type":"default"}
 89 |   ],
 90 | 
 91 |   "files":[
 92 |     {"name":"web.config", "description":"config", "type":"default"},
 93 |     {"name":"wp-config.php", "description":"config", "type":"default"},
 94 |     {"name":"configuration.php", "description":"config", "type":"default"},
 95 |     {"name":"LocalSettings.php", "description":"config", "type":"default"},
 96 |     {"name":"mt-config.cgi", "description":"config", "type":"default"},
 97 |     {"name":"settings.php", "description":"config", "type":"default"},
 98 |     {"name":"setting.php", "description":"config", "type":"default"},
 99 |     {"name":"setting.ini", "description":"config", "type":"default"},
100 |     {"name":"settings.ini", "description":"config", "type":"default"},
101 |     {"name":"config.ini", "description":"config", "type":"default"},
102 |     {"name":"configuration.ini", "description":"config", "type":"default"},
103 |     {"name":"configurations.ini", "description":"config", "type":"default"},
104 |     {"name":".config", "description":"config", "type":"default"},
105 |     {"name":"mediawiki/LocalSettings.php", "description":"config", "type":"default"},
106 |     {"name":"mediawiki/LocalSettings.php", "description":"config", "type":"default"},
107 |     {"name":"mediawiki/LocalSettings.php", "description":"config", "type":"default"},
108 |     {"name":"awstats.conf", "description":"config", "type":"default"},
109 | 
110 |     {"name":".bash_history", "description":"user", "type":"default"},
111 |     {"name":".cache", "description":"user", "type":"default"},
112 |     {"name":".bashrc", "description":"user", "type":"default"},
113 |     {"name":".bash_profile", "description":"user", "type":"default"},
114 |     {"name":".mysql_history", "description":"user", "type":"default"},
115 |     {"name":".sh_history", "description":"user", "type":"default"},
116 |     {"name":".profile", "description":"user", "type":"default"},
117 |     {"name":".rhosts", "description":"user", "type":"default"},
118 |     {"name":".perf", "description":"user", "type":"default"},
119 |     {"name":".profile", "description":"user", "type":"default"},
120 |     {"name":".viminfo", "description":"user", "type":"default"},
121 |     {"name":".vimrc", "description":"user", "type":"default"},
122 |     {"name":".nano_history", "description":"user", "type":"default"},
123 | 
124 |     {"name":".bzr/README", "description":"repository", "type":"bzr"},
125 |     {"name":".bzr/branch-format", "description":"repository", "type":"bzr"},
126 |     {"name":".bzr/branch/branch.conf", "description":"repository", "type":"bzr"},
127 |     {"name":".bzr/branch/format", "description":"repository", "type":"bzr"},
128 |     {"name":".bzr/branch/last-revision", "description":"repository", "type":"bzr"},
129 |     {"name":".bzr/branch/tags", "description":"repository", "type":"bzr"},
130 |     {"name":".bzr/checkout/conflicts", "description":"repository", "type":"bzr"},
131 |     {"name":".bzr/checkout/dirstate", "description":"repository", "type":"bzr"},
132 |     {"name":".bzr/checkout/format", "description":"repository", "type":"bzr"},
133 |     {"name":".bzr/checkout/merge-hashes", "description":"repository", "type":"bzr"},
134 |     {"name":".bzr/checkout/views", "description":"repository", "type":"bzr"},
135 |     {"name":".bzr/repository/format", "description":"repository", "type":"bzr"},
136 |     {"name":".bzr/repository/pack-names", "description":"repository", "type":"bzr"},
137 |     {"name":".git/HEAD", "description":"repository", "type":"git"},
138 |     {"name":".git/index", "description":"repository", "type":"git"},
139 |     {"name":".git/config", "description":"repository", "type":"git"},
140 |     {"name":".git/description", "description":"repository", "type":"git"},
141 |     {"name":".git/COMMIT_EDITMSG", "description":"repository", "type":"git"},
142 |     {"name":".svn/entries", "description":"repository", "type":"svn"},
143 |     {"name":".hg/store/00manifest.i", "description":"repository", "type":"hg"},
144 |     {"name":".hg/store/00manifest.d", "description":"repository", "type":"hg"},
145 |     {"name":".hg/store/00changelog.i", "description":"repository", "type":"hg"},
146 |     {"name":".hg/store/00changelog.d", "description":"repository", "type":"hg"},
147 |     {"name":".hg/dirstate", "description":"repository", "type":"hg"},
148 |     {"name":".hg/requires", "description":"repository", "type":"hg"},
149 |     {"name":"CVS/Repository", "description":"repository", "type":"CVS"},
150 |     {"name":"CVS/Entries", "description":"repository", "type":"CVS"},
151 |     {"name":"CVS/Root", "description":"repository", "type":"CVS"},
152 |     {"name":"CVS/fileattr.xml", "description":"repository", "type":"CVS"},
153 | 
154 |     {"name":".listings", "description":"filelist", "type":"default"},
155 |     {"name":".listing", "description":"filelist", "type":"default"},
156 |     {"name":"thumbs.db", "description":"filelist", "type":"default"},
157 | 
158 |     {"name":"access.log", "description":"log", "type":"log"},
159 |     {"name":"error.log", "description":"log", "type":"log"},
160 |     {"name":"development.log", "description":"log", "type":"log"},
161 |     {"name":"dev.log", "description":"log", "type":"log"},
162 |     {"name":"production.log", "description":"log", "type":"log"},
163 |     {"name":"prod.log", "description":"log", "type":"log"},
164 | 
165 |     {"name":".htpasswd", "description":"access", "type":"default"},
166 |     {"name":".htaccess", "description":"access", "type":"default"},
167 |     {"name":"id_rsa", "description":"access", "type":"default"},
168 |     {"name":"id_dsa", "description":"access", "type":"default"},
169 | 
170 |     {"name":"dump.zip", "description":"dump", "type":"default"},
171 |     {"name":"db.zip", "description":"dump", "type":"default"},
172 |     {"name":"database.zip", "description":"dump", "type":"default"},
173 |     {"name":"database.tar", "description":"dump", "type":"default"},
174 |     {"name":"database.tar.gz", "description":"dump", "type":"default"},
175 |     {"name":"database.rar", "description":"dump", "type":"default"},
176 |     {"name":"dump.zip", "description":"dump", "type":"default"},
177 |     {"name":"db_full.zip", "description":"dump", "type":"default"},
178 |     {"name":"db_full.tar", "description":"dump", "type":"default"},
179 |     {"name":"db_full.tar.gz", "description":"dump", "type":"default"},
180 |     {"name":"db_full.rar", "description":"dump", "type":"default"},
181 | 
182 |     {"name":"test.php", "description":"random", "type":"default"},
183 |     {"name":"test.txt", "description":"random", "type":"default"},
184 |     {"name":"test.asp", "description":"random", "type":"default"},
185 |     {"name":"test.aspx", "description":"random", "type":"default"},
186 |     {"name":"test.asmx", "description":"random", "type":"default"},
187 | 
188 |     {"name":"backup", "description":"backup", "type":"backup"},
189 |     {"name":"bak", "description":"backup", "type":"backup"},
190 |     {"name":"pass.db", "description":"passwords", "type":"passwords"},
191 |     {"name":"password", "description":"passwords", "type":"passwords"},
192 |     {"name":"passwd", "description":"passwords", "type":"passwords"},
193 |     {"name":".passwd", "description":"passwords", "type":"passwords"},
194 |     {"name":"pwd", "description":"passwords", "type":"passwords"},
195 |     {"name":".pwd", "description":"passwords", "type":"passwords"},
196 |     {"name":"password", "description":"passwords", "type":"passwords"},
197 |     {"name":"passwords", "description":"passwords", "type":"passwords"},
198 | 
199 |     {"name":"xmlrpc.php", "description":"development", "type":"default"},
200 |     {"name":"info.php", "description":"development", "type":"default"},
201 |     {"name":"phpinfo.php", "description":"development", "type":"default"},
202 |     {"name":"infophp.php", "description":"development", "type":"default"},
203 |     {"name":"infos.php", "description":"development", "type":"default"},
204 |     {"name":"php.php", "description":"development", "type":"default"}
205 |   ],
206 | 
207 |   "directories":[
208 |     {"name":".hg", "description":"repository", "type":"hg"},
209 |     {"name":".git", "description":"repository", "type":"git"},
210 |     {"name":".svn", "description":"repository", "type":"svn"},
211 |     {"name":".bzr", "description":"repository", "type":"bzr"},
212 |     {"name":"CVS", "description":"repository", "type":"CVS"},
213 |     {"name":"CVSROOT", "description":"repository", "type":"CVS"},
214 |     {"name":"cvsroot", "description":"repository", "type":"CVS"},
215 | 
216 |     {"name":"conf", "description":"config", "type":"default"},
217 |     {"name":"config", "description":"config", "type":"default"},
218 |     {"name":"configuration", "description":"config", "type":"default"},
219 |     {"name":"configurations", "description":"config", "type":"default"},
220 |     {"name":"setting", "description":"config", "type":"default"},
221 |     {"name":"settings", "description":"config", "type":"default"},
222 | 
223 |     {"name":"manage", "description":"admin", "type":"default"},
224 |     {"name":"management", "description":"admin", "type":"default"},
225 |     {"name":"admin", "description":"admin", "type":"default"},
226 |     {"name":"administration", "description":"admin", "type":"default"},
227 | 
228 |     {"name":"backup", "description":"backup", "type":"backup"},
229 |     {"name":"bac", "description":"backup", "type":"backup"},
230 |     {"name":"back", "description":"backup", "type":"backup"},
231 |     {"name":"bak", "description":"backup", "type":"backup"},
232 |     {"name":"export", "description":"backup", "type":"backup"},
233 |     {
234 |       "name":"save", "description":"backup", "type":"backup"},
235 |     {"name":"saved", "description":"backup", "type":"backup"},
236 | 
237 |     {"name":"log", "description":"admin", "type":"log"},
238 |     {"name":"logs", "description":"admin", "type":"log"},
239 |     {"name":"logging", "description":"admin", "type":"log"},
240 | 
241 |     {"name":"pass", "description":"passwords", "type":"passwords"},
242 |     {"name":"password", "description":"passwords", "type":"passwords"},
243 |     {"name":"passwd", "description":"passwords", "type":"passwords"},
244 |     {"name":"pwd", "description":"passwords", "type":"passwords"},
245 |     {"name":"passwords", "description":"passwords", "type":"passwords"},
246 | 
247 | 
248 |     {"name":"confidential", "description":"privacy", "type":"default"},
249 |     {"name":"confidentiel", "description":"privacy", "type":"default"},
250 | 
251 |     {"name":".ssh", "description":"access", "type":"default"},
252 | 
253 |     {"name":"httpd", "description":"webserver", "type":"default"}
254 |   ]
255 | 
256 | }


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | bs4==0.0.1
2 | nltk==3.1
3 | requests==2.10.0
4 | textblob==0.11.0
5 | tld==0.7.6
6 | 


--------------------------------------------------------------------------------