├── .gitignore ├── BappDescription.html ├── BappManifest.bmf ├── BurpSmartBuster.py ├── DemoLabs - BurpSmartBuster - DEF CON 2016.pdf ├── DerbyCon 2016 - BurpSmartBuster - Stable Talk.pdf ├── LICENSE.md ├── README.md ├── bsb.ini ├── data.json └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.pyc 3 | -------------------------------------------------------------------------------- /BappDescription.html: -------------------------------------------------------------------------------- 1 |

This is a Burp Suite extension which discover content with a smart touch. A 2 | bit like "DirBuster" and "Burp Discover Content", but smarter and being 3 | integrated into Burp Suite this plugin looks at words in pages, the domain name, 4 | the current directories and filename to help you find hidden files, directories 5 | and information you usually don't with a static dictionary file that brute 6 | forces its way on the web server.

7 | 8 |

For more information, please refer to 9 | 10 | https://github.com/pathetiq/BurpSmartBuster

11 | -------------------------------------------------------------------------------- /BappManifest.bmf: -------------------------------------------------------------------------------- 1 | Uuid: 7044ef35fa5a49b39285e101a79bf4ae 2 | ExtensionType: 2 3 | Name: BurpSmartBuster 4 | RepoName: burp-smart-buster 5 | ScreenVersion: 0.2 6 | SerialVersion: 3 7 | MinPlatformVersion: 2 8 | ProOnly: False 9 | Author: Patrick Mathieu @pathetiq 10 | ShortDescription: Looks for files, directories and file extensions based on current requests received by Burp Suite 11 | EntryPoint: BurpSmartBuster.py 12 | -------------------------------------------------------------------------------- /BurpSmartBuster.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | Created on 2015-02-22 4 | 5 | BurpSmartBuster 6 | @author: @pathetiq 7 | @thanks: Abhineet & @theguly 8 | @version: 0.3 9 | @summary: This is a Burp Suite extension which discover content with a smart touch. A bit like “DirBuster” and “Burp Discover Content”, 10 | but smarter and being integrated into Burp Suite this plugin looks at words in pages, the domain name, the current directories and filename 11 | to help you find hidden files, directories and information you usually don't with a static dictionary file that brute force its way on the web server. 12 | 13 | @bug: URL with variable, no file, no extension or weird variable separate by ; :, etc. breaks the directories/files listing 14 | @todo: technology detection and scanning, community files, add 404 detection in output, threads speeds and adjustments 15 | @todo: Add results to an issue. add tested files somewhere, add found file to sitemap. 16 | 17 | ''' 18 | import os 19 | os.environ["NLTK_DATA"] = os.path.join(os.getcwd(), "nltk_data") 20 | 21 | #sys imports 22 | import sys 23 | 24 | #Find the jython path where our prerequisites packages are installed 25 | import site 26 | for site in site.getsitepackages(): 27 | sys.path.append(site) 28 | #Examples of paths if needed 29 | #sys.path.append("/home/USERNAME/.local/lib/python2.7/site-packages/") 30 | #sys.path.append("/usr/local/lib/python2.7/site-packages") 31 | ##sys.path.append("/usr/lib/python2.7/dist-packages/") 32 | #sys.path.append("/home/USERNAME/Documents/Apps/TextBlob") 33 | #sys.path.append("/home/USERNAME/Documents/Apps/nltk") 34 | 35 | #burp imports 36 | from burp import IBurpExtender 37 | from burp import IScanIssue 38 | from burp import IScannerCheck 39 | from burp import IScannerInsertionPoint 40 | from burp import IHttpListener 41 | from burp import IBurpExtenderCallbacks 42 | 43 | #UI Import 44 | from burp import IContextMenuFactory 45 | from java.util import List, ArrayList 46 | from burp import ITab 47 | from javax.swing import JPanel, JLabel, JMenuItem, JTextField, JList, DefaultListModel, JButton, JFileChooser 48 | from javax.swing import JScrollPane, ListSelectionModel, GroupLayout, ButtonGroup, JRadioButton 49 | from java.awt import Dimension 50 | from java.awt import Toolkit 51 | from java.awt.datatransfer import StringSelection 52 | 53 | #utils imports 54 | from array import array 55 | from java.io import PrintWriter 56 | from java.net import URL 57 | import os 58 | import ConfigParser 59 | import json 60 | import logging 61 | from tld import get_tld 62 | import hashlib 63 | import random 64 | 65 | #spidering 66 | from bs4 import BeautifulSoup 67 | import Queue 68 | 69 | #Parse HTML comments 70 | from bs4 import Comment 71 | import re 72 | from urlparse import urlparse 73 | 74 | #requester 75 | import requests 76 | import csv 77 | from collections import deque 78 | import threading 79 | 80 | #text tokenization & natural language lib 81 | locals() 82 | #TODO: REVALIDATE the following : file /usr/local/lib/python2.7/dist-packages/nltk/internals.py line 902 has been change to remove os.getgroups() to compile in Burp...Jhython? 83 | #http://textminingonline.com/getting-started-with-textblob 84 | from textblob import TextBlob 85 | 86 | 87 | 88 | 89 | '''---------------------------------------------------------------------------------------------------------------------------------------- 90 | BurpSmartBuster Logging object and config 91 | ----------------------------------------------------------------------------------------------------------------------------------------''' 92 | class Logger(): 93 | 94 | LOG_FILENAME = 'BSB.log' 95 | DEFAULT_LEVEL = logging.DEBUG 96 | 97 | def __init__(self,name=LOG_FILENAME,level=DEFAULT_LEVEL): 98 | 99 | #define configs 100 | self._default_level=level 101 | self._name = name 102 | print "Log file is: " + name 103 | 104 | logging.basicConfig(filename=self._name+".log", 105 | level=self._default_level, 106 | format="%(asctime)s - [%(levelname)s] [%(threadName)s] (%(funcName)s:%(lineno)d) %(message)s", 107 | ) 108 | 109 | self._logger = logging.getLogger(name) 110 | return 111 | 112 | def getLogger(self): 113 | return self._logger 114 | 115 | 116 | '''---------------------------------------------------------------------------------------------------------------------------------------- 117 | BurpSmartBuster main class (BurpExtender) 118 | ----------------------------------------------------------------------------------------------------------------------------------------''' 119 | class BurpExtender(IBurpExtender, IScanIssue, IScannerCheck, IScannerInsertionPoint,IHttpListener, IBurpExtenderCallbacks, IContextMenuFactory, ITab): 120 | 121 | # definitions 122 | EXTENSION_NAME = "BurpSmartBuster" 123 | AUTHOR = "@pathetiq" 124 | 125 | def registerExtenderCallbacks(self, callbacks): 126 | # keep a reference to our callbacks object 127 | self._callbacks = callbacks 128 | 129 | # obtain an extension helpers object 130 | self._helpers = callbacks.getHelpers() 131 | 132 | # define stdout writer 133 | self._stdout = PrintWriter(callbacks.getStdout(), True) 134 | 135 | print(self.EXTENSION_NAME + ' by ' + self.AUTHOR) 136 | print('================================') 137 | print('This extension will create new requests for ALL "in scope" HTTP request made through Burp. Make sure to filter scope items') 138 | print('For help or any information see the github page or contact the author on twitter.') 139 | print('Note: The Spider currently only supports English, see author github page for new language installation instructions') 140 | 141 | # set our extension name 142 | callbacks.setExtensionName(self.EXTENSION_NAME) 143 | callbacks.registerScannerCheck(self) 144 | callbacks.registerHttpListener(self) 145 | callbacks.registerContextMenuFactory(self) 146 | 147 | #Initialize tab details 148 | 149 | #fields of options setBounds(x,y,width,heigth) 150 | self.verboseLabel = JLabel("Verbose") 151 | self.verboseLabel.setBounds(10,10,130,30) 152 | 153 | self.yesVerboseButton = JRadioButton("Yes") 154 | self.yesVerboseButton.setSelected(True) 155 | self.yesVerboseButton.setBounds(10,40,50,30) 156 | self.noVerboseButton = JRadioButton("No") 157 | self.noVerboseButton.setBounds(70,40,50,30) 158 | 159 | self.buttonGroup = ButtonGroup() 160 | self.buttonGroup.add(self.yesVerboseButton) 161 | self.buttonGroup.add(self.noVerboseButton) 162 | 163 | self.spiderPagesLabel = JLabel("Spider: Nbr of pages") 164 | self.spiderPagesLabel.setBounds(10,70,200,30) 165 | self.spiderPagesTextField = JTextField(300) 166 | self.spiderPagesTextField.setText("5") 167 | self.spiderPagesTextField.setBounds(10,100,300,30) 168 | self.spiderPagesTextField.setPreferredSize( Dimension( 250, 20 ) ) 169 | 170 | self.spiderRecPagesLabel = JLabel("Recursive: Nbr of pages") 171 | self.spiderRecPagesLabel.setBounds(10,130,250,30) 172 | self.spiderRecPagesTextField = JTextField(300) 173 | self.spiderRecPagesTextField.setText("3") 174 | self.spiderRecPagesTextField.setBounds(10,160,300,30) 175 | self.spiderRecPagesTextField.setPreferredSize( Dimension( 250, 20 ) ) 176 | 177 | self.fileTypeLabel = JLabel("Ignore Filetypes") 178 | self.fileTypeLabel.setBounds(10,190,130,30) 179 | self.fileTypeTextField = JTextField(300) 180 | self.fileTypeTextField.setText("gif,jpg,png,css,js,ico,woff") 181 | self.fileTypeTextField.setBounds(10,220,300,30) 182 | self.fileTypeTextField.setPreferredSize( Dimension( 250, 20 ) ) 183 | 184 | self.inScopeLabel = JLabel("Scan in-scope URLs only?") 185 | self.inScopeLabel.setBounds(10,250,200 ,30) 186 | 187 | self.yesInScopeButton = JRadioButton("Yes") 188 | self.yesInScopeButton.setBounds(10,280,50,30) 189 | self.yesInScopeButton.setSelected(True) 190 | self.noInScopeButton = JRadioButton("No") 191 | self.noInScopeButton.setBounds(70,280,50,30) 192 | 193 | self.buttonGroup1 = ButtonGroup() 194 | self.buttonGroup1.add(self.yesInScopeButton) 195 | self.buttonGroup1.add(self.noInScopeButton) 196 | 197 | self.refreshConfigButton = JButton("Update Configuration", actionPerformed=self.updateConfig) 198 | self.refreshConfigButton.setBounds(10,310,200,30) 199 | 200 | #Jlist to contain the results 201 | self.list = JList([]) 202 | self.list.setSelectionMode(ListSelectionModel.MULTIPLE_INTERVAL_SELECTION) 203 | self.list.setLayoutOrientation(JList.VERTICAL) 204 | self.list.setVisibleRowCount(-1) 205 | self.listScroller = JScrollPane(self.list,JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED) 206 | self.listScroller.setBounds(510,40,500,500) 207 | #self.listScroller.setPreferredSize(Dimension(400, 500)) 208 | 209 | self.urlFoundLabel = JLabel("URLs Found") 210 | self.urlFoundLabel.setBounds(510,10,130,30) 211 | self.listScroller.setPreferredSize(Dimension(500, 100)) 212 | self.listScroller.setViewportView(self.list) 213 | 214 | self.clearListButton = JButton("Clear list", actionPerformed=self.clearList) 215 | self.clearListButton.setBounds(350,40,150,30) 216 | 217 | self.copyListButton = JButton("Copy Selected", actionPerformed=self.copyList) 218 | self.copyListButton.setBounds(350,70,150,30) 219 | 220 | self.deleteListButton = JButton("Delete Selected", actionPerformed=self.deleteSelected) 221 | self.deleteListButton.setBounds(350,100,150,30) 222 | 223 | self.exportListButton = JButton("Export list", actionPerformed=self.exportList) 224 | self.exportListButton.setBounds(350,130,150,30) 225 | 226 | 227 | #main panel 228 | self.mainpanel = JPanel() 229 | self.mainpanel.setLayout(None) 230 | 231 | self.mainpanel.add(self.verboseLabel) 232 | self.mainpanel.add(self.yesVerboseButton) 233 | self.mainpanel.add(self.noVerboseButton) 234 | self.mainpanel.add(self.spiderPagesLabel) 235 | self.mainpanel.add(self.spiderPagesTextField) 236 | self.mainpanel.add(self.spiderRecPagesLabel) 237 | self.mainpanel.add(self.spiderRecPagesTextField) 238 | self.mainpanel.add(self.fileTypeLabel) 239 | self.mainpanel.add(self.fileTypeTextField) 240 | self.mainpanel.add(self.inScopeLabel) 241 | self.mainpanel.add(self.yesInScopeButton) 242 | self.mainpanel.add(self.noInScopeButton) 243 | self.mainpanel.add(self.refreshConfigButton) 244 | self.mainpanel.add(self.urlFoundLabel) 245 | self.mainpanel.add(self.listScroller) 246 | self.mainpanel.add(self.clearListButton) 247 | self.mainpanel.add(self.copyListButton) 248 | self.mainpanel.add(self.deleteListButton) 249 | self.mainpanel.add(self.exportListButton) 250 | 251 | callbacks.customizeUiComponent(self.mainpanel) 252 | callbacks.addSuiteTab(self) 253 | 254 | #set default config file name and values 255 | 256 | #only smart is use, keeping other for future development 257 | self._configSmart_Local = False 258 | self._configSmart_Smart = True 259 | self._configSmart_File = False 260 | self._configSmart_Spider = False 261 | self._trailingSlash = True 262 | 263 | #To be fetch from the UI settings 264 | self._configSpider_NumberOfPages = 5 265 | self._verbose = False 266 | self._ignoreFileType = ["gif","jpg","png","css","js","ico","woff"] 267 | #keeping to use it 268 | self._configInScope_only = True 269 | self._configSpider_NumberOfPages = 5 270 | 271 | #Get a logger object for logging into file 272 | loggerTemp = Logger(self.EXTENSION_NAME,logging.DEBUG) 273 | self._logger= loggerTemp.getLogger() 274 | 275 | #get the config file, will overwrite default config if the ini file is different 276 | #self.getSmartConfiguration() 277 | 278 | #get config from the UI 279 | self.updateConfig("") 280 | 281 | #words gather on the page from the spidering 282 | self._words = {} 283 | self._mergedWords = {} 284 | 285 | #robots.txt list 286 | self._robots = {} 287 | self._robotsScanned = {} 288 | 289 | #sitemap.xml list 290 | self._sitemap = {} 291 | 292 | #url in comments 293 | self._urlsInComment = {} 294 | 295 | #domain names to query current url/path/files for hidden items 296 | self._smartDomain = {} 297 | 298 | #sitemap and robots scanned once 299 | self._siteRobotScanned = {} 300 | 301 | #Load our BSB json data 302 | self._jsonFile = "data.json" 303 | jsonfile = open(self._jsonFile) 304 | self._parsed_json = json.load(jsonfile) 305 | jsonfile.close() 306 | 307 | #define the request object to use each time we need to call a URL 308 | self._requestor = Requestor(self._logger,self) 309 | 310 | #Variable to define if unique data has already been grabbed 311 | self._smartRequestData = {} 312 | self._smartRequestPath = {} 313 | self._smartRequestFiles = {} 314 | #number of time the spider have run 315 | self._spiderRan = {} #Array of domain. If domain exist. Spider did ran! 316 | 317 | return 318 | 319 | ''' 320 | Graphic Functions 321 | ''' 322 | def createMenuItems(self, contextMenuInvocation): 323 | self._contextMenuData = contextMenuInvocation.getSelectedMessages() 324 | menu_list = ArrayList() 325 | menu_list.add(JMenuItem("Send to BurpSmartBuster",actionPerformed=self.menuItemClicked)) 326 | return menu_list 327 | 328 | def menuItemClicked(self, event): 329 | data = self.getURLdata(self._contextMenuData[0],True) 330 | self._logger.info("SMARTREQUEST FOR: "+data.getUrl().toString()) 331 | self._logger.debug("Executing: smartRequest() from menuItemClicked") 332 | thread = threading.Thread( 333 | target=self.smartRequest, 334 | name="Thread-smartRequest", 335 | args=[data],) 336 | thread.start() 337 | 338 | # Implement ITab 339 | def getTabCaption(self): 340 | return self.EXTENSION_NAME 341 | 342 | # Return our panel and button we setup. Components of our extension's tab 343 | def getUiComponent(self): 344 | return self.mainpanel 345 | 346 | '''------------------------------------------------ 347 | Extension Unloaded 348 | ------------------------------------------------''' 349 | def extensionUnloaded(self): 350 | self._logger.info("Extension was unloaded") 351 | return 352 | 353 | '''------------------------------------------------ 354 | VERBOSE FUNCTION 355 | 356 | Display each tested URL 357 | ------------------------------------------------''' 358 | def verbose(self,text): 359 | #Is verbose on or off from config file? 360 | if self._verbose == True: 361 | print "[VERBOSE]: "+text 362 | return 363 | 364 | '''------------------------------------------------ 365 | GRAPHICAL FUNCTIONS for BUTTONS 366 | ------------------------------------------------''' 367 | 368 | def getRecursiveConfig(self): 369 | return int(self.spiderRecPagesTextField.getText()) 370 | 371 | #refresh the config from the UI 372 | def updateConfig(self,meh): 373 | self._configSpider_NumberOfPages = int(self.spiderPagesTextField.getText()) 374 | 375 | if self.yesVerboseButton.isSelected(): 376 | self._verbose = True 377 | else: 378 | self._verbose = False 379 | 380 | if self.yesInScopeButton.isSelected(): 381 | self._configInScope_only = True 382 | else: 383 | self._configInScope_only = False 384 | 385 | fileType = [] 386 | fileTypeStr = self.fileTypeTextField.getText() 387 | self._ignoreFileType = self.fileTypeTextField.getText().split(",") 388 | 389 | self._logger.info("Config changed: " + "spiderNbrPages=" + str(self._configSpider_NumberOfPages) + ", Verbose is:" + str(self._verbose) + ", InScope is:" + str(self._configInScope_only) + ", fileTypeIgnored: " + str(self._ignoreFileType)) 390 | print "Now using config: " + "spiderNbrPages=" + str(self._configSpider_NumberOfPages) + ", Verbose is:" + str(self._verbose) + ", InScope is:" + str(self._configInScope_only) + ", fileTypeIgnored: " + str(self._ignoreFileType) 391 | 392 | return 393 | 394 | #add a URL to the list 395 | def addURL(self,url): 396 | list = self.getListData() 397 | list.append(url) 398 | 399 | self.list.setListData(list) 400 | return 401 | 402 | #return the who list 403 | def getListData(self): 404 | list = [] 405 | 406 | for i in range(0, self.list.getModel().getSize()): 407 | list.append(self.list.getModel().getElementAt(i)) 408 | 409 | return list 410 | 411 | #Clear the list 412 | def clearList(self,meh): 413 | self.list.setListData([]) 414 | return 415 | 416 | #Copy to clipboard 417 | def copyList(self,meh): 418 | clipboard = Toolkit.getDefaultToolkit().getSystemClipboard() 419 | list = self.getListData() 420 | selected = self.list.getSelectedIndices().tolist() 421 | 422 | copied = "" 423 | urls = "" 424 | for i in selected: 425 | url = str(list[i]).split(',')[0] 426 | urls = urls+str(url)+"\n" 427 | 428 | clipboard.setContents(StringSelection(urls), None) 429 | 430 | return 431 | 432 | #Delete selected item from the list 433 | def deleteSelected(self,meh): 434 | x = self.list.getSelectedIndices().tolist() 435 | list = self.getListData() 436 | 437 | for i in reversed(x): 438 | del list[i] 439 | 440 | self.list.setListData(list) 441 | return 442 | 443 | #TODO: save as the list 444 | def exportList(self,meh): 445 | fd = JFileChooser() 446 | dialog = fd.showDialog(self.mainpanel, "Save List As") 447 | 448 | dataList = self.getListData() 449 | 450 | urls = "" 451 | 452 | if dialog == JFileChooser.APPROVE_OPTION: 453 | file = fd.getSelectedFile() 454 | path = file.getCanonicalPath() 455 | 456 | try: 457 | with open(path, 'w') as exportFile: 458 | for item in dataList: 459 | url = str(item).split(',')[0] 460 | exportFile.write(url+"\n") 461 | except IOError as e: 462 | print "Error exporting list: " + str(e) 463 | self._logger.debug("Error exporting list to: " + path + ", Error: " + str(e)) 464 | 465 | return 466 | 467 | '''------------------------------------------------------------------------------------------------ 468 | MAIN FUNCTION / WHERE EVERYTHING STARTS 469 | 470 | For every request which isn't created from the Extender(this might have to be change) 471 | The request is analyse and related to the config options new request are create to test if 472 | specific files/paths/directories exists. 473 | ------------------------------------------------------------------------------------------------''' 474 | def processHttpMessage(self, toolFlag, messageIsRequest, messageInfo): #IHttpRequestResponse message info 475 | 476 | 477 | #TODO: not from repeater and intruder --> set in ini file too! --> and toolFlag != self._callbacks.TOOL_EXTENDER 478 | 479 | #This is required to not LOOP Forever as our plugin generate requests! 480 | if toolFlag == self._callbacks.TOOL_PROXY and toolFlag != self._callbacks.TOOL_EXTENDER and toolFlag != self._callbacks.TOOL_SCANNER: 481 | 482 | #Get an Urldata object to use later 483 | data = self.getURLdata(messageInfo,messageIsRequest) 484 | 485 | #VERIFICATION: if URL is in scope we do scan 486 | if not self._callbacks.isInScope(data.getUrl()): 487 | #self._callbacks.includeInScope(url) 488 | self._logger.info("URL not in scope: " + data.getUrl().toString()) 489 | return 490 | 491 | if messageIsRequest: 492 | self._logger.debug("Entering: processHttpMessage() REQUEST") 493 | self._logger.debug("Request from domain: "+data.getDomain()) 494 | 495 | #REJECT specific extension on request 496 | if data.getFileExt() in self._ignoreFileType: 497 | self._logger.info("FILETYPE IGNORED: " + data.getUrl().toString()) 498 | return 499 | 500 | ############################################### 501 | # Decide which mode to use based on ini config 502 | ############################################### 503 | 504 | #from browsed file only 505 | if self._configSmart_Smart: 506 | self._logger.info("SMARTREQUEST FOR: "+data.getUrl().toString()) 507 | self._logger.debug("Executing: smartRequest()") 508 | thread = threading.Thread( 509 | target=self.smartRequest, 510 | name="Thread-smartRequest", 511 | args=[data], 512 | ) 513 | thread.start() 514 | thread.join() 515 | 516 | #wordlist adjust with the domain name 517 | elif self._configSmart_Local: 518 | self._logger.debug("Executing: localRequest()") 519 | self.localRequest(data) 520 | 521 | #your own wordlist, no smart here 522 | elif self._configSmart_File: 523 | self._logger.debug("Executing: fileRequest()") 524 | self.fileRequest(data) 525 | 526 | #spidered items only. Like smart but it browse for you. 527 | elif self._configSmart_Spider: 528 | self._logger.debug("Executing: spiderRequest()") 529 | self.spiderRequest(data) 530 | 531 | else: #if response 532 | self._logger.debug("Entering: processHttpMessage() RESPONSE") 533 | 534 | ############################################### 535 | # Decide which mode to use based on ini config 536 | ############################################### 537 | #VERIFICATION: if URL is in scope we do scan 538 | #if not self._callbacks.isInScope(data.getUrl()): 539 | # #self._callbacks.includeInScope(url) 540 | # self._logger.info("URL %s not in scope: " % data.getUrl()) 541 | # return 542 | 543 | #from browsed file only 544 | #TODO: sniff JS and CSS file for URLS 545 | #if self._configSmart_Smart: 546 | self._logger.debug("Executing: getUrlInComments()") 547 | thread = threading.Thread( 548 | target=self.getUrlInComments, 549 | name="Thread-getUrlInComments", 550 | args=[data], 551 | ) 552 | thread.start() 553 | thread.join() 554 | return 555 | 556 | '''---------------------------------------------------------------------------------------------------------- 557 | BurpSmartBuster main class (BurpExtender) 558 | Only spidering to gather the more page and test those 559 | ----------------------------------------------------------------------------------------------------------''' 560 | def spiderRequest(self, data): 561 | return 562 | 563 | '''---------------------------------------------------------------------------------------------------------- 564 | Use BSB files on all visited page 565 | ----------------------------------------------------------------------------------------------------------''' 566 | def localRequest(self, data): 567 | return 568 | 569 | '''---------------------------------------------------------------------------------------------------------- 570 | Use user supply file on all visited page 571 | ----------------------------------------------------------------------------------------------------------''' 572 | def fileRequest(self, data): 573 | return 574 | 575 | 576 | '''---------------------------------------------------------------------------------------------------------- 577 | Use the logic, based on the BSB files and data from the website 578 | This is where all the magic happens. 579 | 580 | We want to : 581 | - Call some file extension for the file we browsed to 582 | -TODO: Get a huge list 583 | - Extension 584 | - User file, windows, linux, osx 585 | - Call some path when browsing a new path (even when it is a file) 586 | - default path list 587 | - Call some files when browsing a new path 588 | - user files windows, osx, linux 589 | - backup list 590 | - autosave list 591 | - svn, git list 592 | - CMS 593 | - Web server, etc. 594 | - Get robots.txt and sitemap data 595 | - Brute force up to 2 or 3 letters of files names and path on all found path which is not cms/git/etc. 596 | 597 | - Future version: Parse HTML comments for path 598 | 599 | 600 | - If they exist, we add them to XXX? 601 | - If new path exists, let's go recursive (new class?) 602 | - If file exists: add to sitemap + verbose + log 603 | 604 | @param data: UrlData object containing all information about the URL 605 | ----------------------------------------------------------------------------------------------------------''' 606 | def smartRequest(self,data): 607 | 608 | #Current request variables 609 | domain = data.getDomain() 610 | url = data.getUrl() 611 | 612 | ##################### FETCH DATA ############################### 613 | # Gather smart data once before sending requests 614 | ################################################################ 615 | self._logger.debug("Has the Data been gathered for? : "+ str(url)) 616 | if domain not in self._smartRequestData: 617 | try: 618 | self._smartRequestData[domain] = True 619 | self._logger.debug("no") 620 | self._logger.info("Fetching data for: "+ domain) 621 | 622 | print "getting data for:" + str(url) 623 | self.getSmartData(data) 624 | 625 | except Exception as e: 626 | print "exception:"+ e 627 | self._smartRequestData[domain] = False 628 | return False 629 | else: 630 | self._logger.debug("yes") 631 | 632 | # Execution of request with the received data: 633 | # - spider 634 | # - sitemap 635 | # - robots 636 | # - current directories 637 | # - commentsInUrl 638 | # json data: 639 | # - extension files 640 | # - common basic cms files 641 | # - common server files 642 | # - common user files 643 | # - common test files 644 | # - common repositories files 645 | # - 646 | # - 647 | ''' 648 | For the current directories (path) 649 | - Test a path/file for a category of path/files 650 | - If a tested path/files exist (200/401/403/500) scan other files + - add to sitemap and LOG + add issues? 651 | - If not skip it 652 | - go 3 deep max and retest all 653 | 654 | TODO future version: 655 | Pseudo algo: 656 | Si le present url est un fichier: 657 | - Si c'Est un fichier php... tester phps extension. 658 | - si c'Est un fichier asmx, tester les wsdl 659 | 660 | Si c'Est un path: 661 | - si ca inclus un path dans sharepoint, tester les sharepoints 662 | - si ca inclus un fichier de wordpress ou drupal, tester quelques fichiers cms 663 | - Si on trouve un répertoire de type X, effectuer une recherche sur les fichiers de type X dans le repertoire trouvé 664 | ''' 665 | 666 | 667 | 668 | 669 | 670 | #Current request data 671 | baseUrl = data.getBaseUrl() 672 | path = data.getPath() 673 | filename = data.getFilename() 674 | extension = data.getFileExt() 675 | print "CURRENT FILE: " + baseUrl + "," + filename + "," + extension 676 | #data.json sections: extensions, fileprefix, filesuffix, files, directories 677 | 678 | #test local file 679 | #if current url is a file: test extentions + intelligent details 680 | #AND we test current file with prefix and suffix 681 | 682 | #testing directories 683 | #if current URL have some directories test them out 684 | #Test them with FILES and DIRECTORIES. Including the current directory (last in path) 685 | 686 | #with the smart data test robots path and files 687 | #test N url from sitemap 688 | #in current paths test files and path using domainname and domain without the tld 689 | #with filename generated + extensions and path/filenamegenerated 690 | ''' 691 | print "EXTENSIONS" 692 | for extension in self._parsed_json["extensions"]: 693 | print extension["name"] 694 | print "SUFFIX PREFIX" 695 | for prefix in self._parsed_json["fileprefix"]: 696 | print prefix["name"] 697 | for suffix in self._parsed_json["filesuffix"]: 698 | print suffix["name"] 699 | 700 | print "FILES" 701 | for files in self._parsed_json["files"]: 702 | print files["name"] 703 | ''' 704 | 705 | print "DIRECTORIES" 706 | 707 | #Directories data information 708 | directories = data.getDirectories() 709 | directory = "/" 710 | slash = "" #force slash or not var 711 | 712 | #get options foir trailing slash. By default it's ON 713 | if self._trailingSlash: 714 | slash = "/" 715 | 716 | ##################### EXECUTE DATA.json REQUESTS ################### 717 | # Build Request to be execute based on our data.json 718 | # and getSmartData results 719 | ################################################################ 720 | 721 | #TODO: important put tested directories and files in a dictionnary or array 722 | #TODO: important put tested directories and files in a dictionnary or array 723 | #TODO: important put tested directories and files in a dictionnary or array 724 | #TODO: important put tested directories and files in a dictionnary or arrayà 725 | 726 | 727 | ######################## 728 | # Technology scanner 729 | ######################## 730 | ''' 731 | - do a request to root dir 732 | - get response (check for redirect) 733 | - check headers 734 | - check file extensions 735 | - depending on results scan X files. 736 | - Set current domain technologyVar to X 737 | ''' 738 | 739 | ################ 740 | #Scan the root directory! 741 | ################ 742 | print "DIR: "+str(directories) 743 | 744 | if not directories: 745 | directories = ["/"] 746 | 747 | # response will be dealed in requestor 748 | for dir in directories: 749 | print "TESTING: " + dir 750 | if dir == "/": 751 | directory = "/" 752 | else: 753 | directory = directory+dir+"/" #test all directories: / /a/ /a/b/ /a/b/c/ ... 754 | 755 | #call our directories inside all request directires 756 | for dir2 in self._parsed_json["directories"]: 757 | self.verbose("RequestDir for: "+baseUrl+directory+dir2["name"]+slash) 758 | self._requestor.addRequest(baseUrl+directory+dir2["name"]+slash,data) 759 | 760 | # call directories based on domain information: url/a/b/c/smartDomain , url/a/b/smartDomain/, etc. 761 | #print "SMARTDOMAIN"+self._smartDomain 762 | for dir2 in self._smartDomain[domain]: 763 | self.verbose("RequestSmartDomain for: " + baseUrl + directory + dir2) 764 | self._requestor.addRequest(baseUrl + directory + dir2,data) 765 | 766 | #in each directory call smartDomain.extensions 767 | for ext in self._parsed_json["extensions"]: 768 | self.verbose("RequestSmartDomain.ext for: " + baseUrl + directory + dir2 + ext["name"]) 769 | self._requestor.addRequest(baseUrl + directory + dir2 + ext["name"],data) 770 | 771 | #call our files in all directories 772 | #print "parsed json"+self._parsed_json["files"] 773 | for files in self._parsed_json["files"]: 774 | self.verbose("RequestFile for: "+baseUrl+directory+files["name"]) 775 | self._requestor.addRequest(baseUrl+directory+files["name"],data) 776 | 777 | 778 | ################ 779 | #If URL is a file, let's try to add some extension to the file 780 | ################ 781 | if extension: 782 | 783 | #replace current file extension for our extension 784 | tempFilenameUrl = baseUrl+directory+filename 785 | tempFilenameUrl1 = baseUrl+directory+filename+"."+extension 786 | for ext in self._parsed_json["extensions"]: 787 | self.verbose("RequestExt for: "+ tempFilenameUrl+ext["name"]) 788 | self.verbose("RequestFileExt for: "+ tempFilenameUrl1+ext["name"]) 789 | self._requestor.addRequest(tempFilenameUrl+ext["name"],data) 790 | self._requestor.addRequest(tempFilenameUrl1+ext["name"],data) 791 | 792 | #add a prefix to current file 793 | tempFilenameUrl = baseUrl+directory 794 | for prefix in self._parsed_json["fileprefix"]: 795 | tempFilenameUrl1 = tempFilenameUrl+prefix["name"]+filename+"."+extension 796 | self.verbose("RequestPrefix for: "+tempFilenameUrl1) 797 | self._requestor.addRequest(tempFilenameUrl1,data) 798 | 799 | #add suffix to current file 800 | tempFilenameUrl = baseUrl+directory 801 | for suffix in self._parsed_json["filesuffix"]: 802 | tempFilenameUrl1 = tempFilenameUrl+filename+suffix["name"]+"."+extension 803 | self.verbose("RequestSuffix for: "+tempFilenameUrl1) 804 | self._requestor.addRequest(tempFilenameUrl1,data) 805 | 806 | 807 | 808 | #make sure we have some data 809 | #print "DATA RECEIVED" 810 | #print self._words[domain] 811 | #print self._mergedWords ##need to call the emrge function if needed 812 | #print self._robots[domain] 813 | #print str(len(self._sitemap[domain])) 814 | #print str(self._urlsInComment[domain]) 815 | 816 | 817 | 818 | ##################### EXECUTE SMART REQUESTS ################### 819 | # Build Request to be execute based on our data.json 820 | # and getSmartData results 821 | ################################################################ 822 | 823 | #list of smart directories 824 | smartDirectories = {} 825 | 826 | #list of smart files (add our extension to it) 827 | smartfiles = {} 828 | 829 | ################ 830 | #Request N pages from sitemap 831 | ################ 832 | if domain not in self._siteRobotScanned: #Do it once 833 | self._siteRobotScanned[domain] = True #done for this domain 834 | 835 | tmpSiteMap = [] 836 | for i in range(0,self._configSpider_NumberOfPages): #get N number of pages from ini config 837 | tmpSiteMap.append(self._sitemap[domain][i]) 838 | 839 | #Requests files and directories from robots.txt 840 | tmpRobots = [] 841 | for line in self._robots[domain]: 842 | 843 | #in case robots.txt use ending wildcard we remove it 844 | if line.endswith("*"): 845 | line = line[:-1] 846 | #TODO: Test if directory or file is not 404 ?? 847 | tmpRobots.append(baseUrl+line) 848 | 849 | ################ 850 | # requests all value for N sitemap url 851 | ################ 852 | for link in tmpSiteMap: 853 | 854 | if link.endswith("/"): #scan directories and files 855 | 856 | for dir2 in self._parsed_json["directories"]: 857 | self.verbose("RequestSiteMap dir/file for: " + link + dir2["name"] + slash) 858 | self._requestor.addRequest(link + dir2["name"] + slash,data) 859 | 860 | for files in self._parsed_json["files"]: 861 | self.verbose("RequestSiteMap dir/file for: " + link + files["name"]) 862 | self._requestor.addRequest(link + files["name"],data) 863 | 864 | else: #scan extensions and suffix/prefix 865 | # call our files in all directories 866 | for ext in self._parsed_json["extensions"]: 867 | self.verbose("RequestSitemap file/ext/ext for: " + link + ext["name"]) 868 | self._requestor.addRequest(link + ext["name"],data) 869 | 870 | #Get the file extension of the current sitemap url to replace the extension 871 | tmpUrl = urlparse(link) 872 | if len(tmpUrl.path.split(".")[-1:]) > 1: 873 | newUrl = ".".join(tmpUrl.path.split(".")[:-1])+ext["name"] 874 | self.verbose("RequestSiteMap file/ext for: " + newUrl) 875 | self._requestor.addRequest(newUrl,data) 876 | 877 | ################ 878 | #requests all values for robots path 879 | ################ 880 | for link in tmpRobots: 881 | tmpUrl = baseUrl + link 882 | if link.endswith("/"): # scan directories and files 883 | for dir2 in self._parsed_json["directories"]: 884 | self.verbose("RequestRobots dir/file for: " + tmpUrl + dir2["name"] + slash) 885 | self._requestor.addRequest(tmpUrl + dir2["name"] + slash,data) 886 | 887 | for files in self._parsed_json["files"]: 888 | self.verbose("RequestRobots dir/file for: " + tmpUrl + files["name"]) 889 | self._requestor.addRequest(tmpUrl + files["name"],data) 890 | else: 891 | for ext in self._parsed_json["extensions"]: 892 | self.verbose("RequestRobots file/ext/ext for: " + tmpUrl + ext["name"]) 893 | self._requestor.addRequest(tmpUrl + ext["name"],data) 894 | 895 | #Get the file extension of the current sitemap url to replace the extension 896 | tmpUrl1 = urlparse(link) 897 | if len(tmpUrl1.path.split(".")[-1:]) > 1: 898 | newUrl = ".".join(tmpUrl1.path.split(".")[:-1])+ext["name"] 899 | self.verbose("RequestRobots file/ext for: " + newUrl) 900 | self._requestor.addRequest(newUrl,data) 901 | 902 | 903 | #TODO : path and words/merge words 904 | 905 | ################ 906 | #Request from words 907 | ################ 908 | #print self._words 909 | 910 | 911 | #TODO: loop over: sitemap (done), robots (done), words/mergedwords(fixed for textblob required), bruteforce(later) Maybe comments data? 912 | # - add the data to our stack to request and parse by the Requestor object 913 | # - Get current query path and files & Filter out static object from the request (images,etc.) 914 | #filter out: gif,jpg,png,css,ico 915 | 916 | 917 | print "Done. Waiting for more URL...!" 918 | 919 | '''---------------------------------------------------------------------------------------------------------- 920 | Get the data for smartRequest(), it will fills our list of words which will be our smart logic data to create 921 | multiple new HTTP requests. This data should be gather once. 922 | ----------------------------------------------------------------------------------------------------------''' 923 | #TODO: split some of this works in different functions 924 | def getSmartData(self, data): 925 | 926 | ################################################################ 927 | # Get the url and its data to create the new smart requests 928 | ################################################################ 929 | urlString = str(data.getUrl()) #cast to cast to stop the TYPEerror on URL() 930 | domain = data.getDomain() 931 | netloc = data.getNetloc() 932 | directories = data.getDirectories() 933 | lastDirectory = data.getLastDirectory() 934 | params = data.getParams() 935 | fileExt = data.getFileExt() 936 | completeUrl = data.getCompleteURL() 937 | baseUrl = data.getBaseUrl() 938 | 939 | #Java URL to be used with Burp API 940 | url = URL(urlString) 941 | self._logger.debug("Current URLString: "+urlString) 942 | ######################### SPIDER EXECUTION ##################### 943 | # Get some words from the web page: do it once! 944 | # Note: This step could be threaded using Queue.Queue but there is 945 | # little advantage as we need to wait to get all the value anyway 946 | ################################################################ 947 | 948 | self._logger.debug("Has the Spider ran for? : "+ domain) 949 | if domain not in self._spiderRan: #doing it once 950 | self._spiderRan[domain] = True 951 | self._logger.debug("No") 952 | 953 | #self._mergedWords[domain] = {} 954 | #self._words[domain] = {} 955 | 956 | #Start URL, number of page to spider through, request class object to use 957 | 958 | spider = Spider(data, self._configSpider_NumberOfPages, self._requestor,self._logger) 959 | spider.runSpidering() 960 | 961 | #Get words from the spidering 962 | self._words[domain] = spider.getWords() 963 | #Get merged words 964 | #spider.mergeWords() 965 | #self._mergedWords[domain] = spider.getMergedWords() 966 | 967 | self._logger.debug("Length of Words: "+ str(len(self._words[domain]))) 968 | #self._logger.debug("Length of MergedWords: "+ str(len(self._mergedWords[domain]))) 969 | self._logger.info("SPIDER DONE") 970 | else: 971 | self._logger.debug("Yes") 972 | 973 | ################################################################ 974 | # Get robots.txt (once) 975 | # Retrieve unique path and files from the robots.txt 976 | ################################################################ 977 | if domain not in self._robots: #do it once 978 | print " robot " 979 | 980 | #get the file 981 | queueRobot = Queue.Queue(1) 982 | self._logger.info("robot") 983 | thread = threading.Thread( 984 | target=self._requestor.runRequest, 985 | name="Thread-Robots", 986 | args=[baseUrl+"/robots.txt", queueRobot], 987 | ) 988 | thread.start() 989 | thread.join() 990 | response = queueRobot.get() 991 | 992 | #Parse the file for disallow lines 993 | robotList = [] 994 | for item in response.content.split('\n'): 995 | if item: 996 | i = item.split(':') 997 | if i[0].lower() == "disallow" and i[1] not in robotList: 998 | robotList.append(i[1]) 999 | 1000 | #add to domain list 1001 | self._robots[domain] = robotList 1002 | 1003 | self._logger.debug("ROBOT LIST for : " + domain + ":") 1004 | for item in self._robots[domain]: 1005 | self._logger.debug(item) 1006 | 1007 | self._logger.info("ROBOTS DONE") 1008 | 1009 | else: 1010 | print "no robot" 1011 | self._logger.debug("Robots.txt already checked for: " + baseUrl) 1012 | 1013 | ################################################################ 1014 | # Get sitemap.xml (once) 1015 | # test those url for all files/extensions if not in local deque yet 1016 | ################################################################ 1017 | if domain not in self._sitemap: 1018 | print " sitemap " 1019 | queueSitemap = Queue.Queue(1) 1020 | thread = threading.Thread( 1021 | target=self._requestor.runRequest, 1022 | name="Thread-Sitemap", 1023 | args=[baseUrl+"/sitemap.xml", queueSitemap], 1024 | ) 1025 | thread.start() 1026 | thread.join() 1027 | 1028 | response = queueSitemap.get() 1029 | soup = BeautifulSoup(response.content, "html.parser") 1030 | 1031 | #Parse the XML TODO: for N instance related to .ini config 1032 | sitemapList = [] 1033 | for url in soup.findAll("loc"): 1034 | sitemapList.append(url.text) 1035 | 1036 | self._sitemap[domain] = sitemapList 1037 | 1038 | self._logger.debug("Sitemap.xml nbr of items: "+str(len(self._sitemap[domain]))) 1039 | 1040 | self._logger.info("SITEMAP DONE") 1041 | else: 1042 | print "no sitemap" 1043 | 1044 | ################################################################ 1045 | # Get domain name relative values 1046 | # test those names for directory, files with extension 1047 | ################################################################ 1048 | print "smartDomain" 1049 | tmpDomValue = [] 1050 | 1051 | if domain == "localhost": 1052 | tmpDomValue.append(domain) 1053 | else: 1054 | tld = get_tld(urlString, as_object=True) 1055 | tmpDomValue.append(tld.domain) 1056 | tmpDomValue.append(tld.tld) 1057 | 1058 | if tld.subdomain: 1059 | tmpDomValue.append("".join(tld.subdomain+"." + tld.tld)) 1060 | 1061 | 1062 | self._smartDomain[domain] = tmpDomValue 1063 | 1064 | ######################## BRUTE FORCE DATA ###################### 1065 | # 1, 2 or 3 letters brute force of current directory 1066 | # Has the current directory been test already? No: do it 1067 | #brute force function or object? 1068 | ################################################################ 1069 | #TODO: Later version 1070 | #charset = "abcdefghijklmnopqrstuvwxyz0123456789_-" 1071 | #for a in itertools.product(charset,repeat=2): 1072 | # sub="".join(a) 1073 | 1074 | 1075 | return True 1076 | 1077 | 1078 | '''---------------------------------------------------------------------------------------------------------- 1079 | Get the information inside response for smartRequest() 1080 | It will look for URL and email domain inside HTML comments 1081 | 1082 | @todo: Optimize the IFs in the comment for loop! 1083 | ----------------------------------------------------------------------------------------------------------''' 1084 | def getUrlInComments(self,data): 1085 | 1086 | ################### CURRENT DIRECTORIES/FILES ################## 1087 | # Get current directory(ies) 1088 | # validate if tested already 1089 | # If not deal with: test directories and files at currentPath 1090 | # New class object? 1091 | ################################################################ 1092 | responseData = data.getResponseData() 1093 | 1094 | #TODO: Parse HTML files for comments for Path and file 1095 | 1096 | #if you have a response 1097 | if responseData: 1098 | soup = BeautifulSoup(responseData, "html.parser") 1099 | comments=soup.find_all(string=lambda text:isinstance(text,Comment)) 1100 | regUrl = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" 1101 | regEmail = r"[\w\.-]+@[\w\.-]+" 1102 | urlsInComments = [] 1103 | emailsInComments= [] 1104 | urlsInComment = [] 1105 | emailsInComment = [] 1106 | 1107 | for comment in comments: 1108 | #get urls 1109 | urlsComments = re.findall(regUrl,comment) 1110 | #parse url, does the domain the same as our current domain? 1111 | if urlsComments: 1112 | for url in urlsComments[0]: 1113 | if url: 1114 | #Get URLs 1115 | tempData = urlparse(url) 1116 | domainInUrlTemp = '{uri.netloc}'.format(uri=tempData).split('.') 1117 | domainInUrl = ".".join(domainInUrlTemp) 1118 | 1119 | #TODO: url will need to be verify if in scope when we call it : keep the URL path/file for scan 1120 | urlsInComment = re.findall(regUrl,comment) 1121 | urlsInComments.append(urlsInComment) 1122 | 1123 | #get emails 1124 | emailsInComment = re.findall(regEmail, comment) 1125 | emailsInComments.append(emailsInComment) 1126 | self._logger.debug("url in comments and email in comments:") 1127 | 1128 | #get list only 1129 | if urlsInComments and urlsInComments[0]: 1130 | if type(urlsInComment[0]) is tuple: 1131 | self._urlsInComment[data.getDomain] = urlsInComment[0] 1132 | #TODO: use email in another version? 1133 | if emailsInComments and emailsInComments[0]: 1134 | if type(emailsInComments[0]) is tuple: 1135 | emailsInComments = emailsInComments[0] 1136 | 1137 | self._logger.debug(urlsInComments) 1138 | self._logger.debug(emailsInComments) 1139 | 1140 | self._logger.info("COMMENTS DONE") 1141 | 1142 | 1143 | #TODO: finish these function to gather the information from the data.json 1144 | ''' 1145 | Function which is accessing smart list of Path to look into by the smart request function 1146 | ''' 1147 | def getSmartListPath(self): 1148 | return 1149 | 1150 | ''' 1151 | Function which is accessing smart list of file extension to look into by the smart request function 1152 | ''' 1153 | def getSmartListExt(self): 1154 | return 1155 | 1156 | ''' 1157 | Function which is accessing smart list of directories to look into by the smart request function 1158 | ''' 1159 | def getSmartDirectories(self): 1160 | return 1161 | 1162 | ''' 1163 | Function which is accessing smart list of files to look into by the smart request function 1164 | ''' 1165 | def getSmartFiles(self): 1166 | return 1167 | 1168 | ''' 1169 | This functions split all informations of the URL for further use in the smartRequest function 1170 | @param messageInfo: last request executed with all its information 1171 | ''' 1172 | def getURLdata(self,messageInfo,messageIsRequest): 1173 | 1174 | analyzedRequest = self._helpers.analyzeRequest(messageInfo) 1175 | url = analyzedRequest.getUrl() 1176 | self._logger.debug(url) 1177 | 1178 | parsed = urlparse(url.toString()) 1179 | 1180 | '''debug info 1181 | print 'scheme :', parsed.scheme 1182 | print 'netloc :', parsed.netloc 1183 | print 'path :', parsed.path 1184 | print 'params :', parsed.params 1185 | print 'query :', parsed.query 1186 | print 'fragment:', parsed.fragment 1187 | print 'username:', parsed.username 1188 | print 'password:', parsed.password 1189 | print 'hostname:', parsed.hostname, '(netloc in lower case)' 1190 | print 'port :', parsed.port 1191 | ''' 1192 | 1193 | #Is there any parameters? 1194 | params = analyzedRequest.getParameters() 1195 | 1196 | for p in params: 1197 | self._logger.debug("Query var: "+p.getName()) 1198 | self._logger.debug("Query value: "+p.getValue()) 1199 | 1200 | #getURL, needs to be a string before parsing it with urlparse 1201 | completeURL = url.toString() 1202 | self._logger.debug("Complete URL: "+completeURL) 1203 | 1204 | #URL sans port/dir/params 1205 | baseURL = messageInfo.getHttpService().toString() 1206 | self._logger.debug("Base URL: "+baseURL) 1207 | 1208 | 1209 | #Get path including directories and file extension 1210 | path = urlparse(completeURL).path.encode("utf-8") 1211 | filename = path.split('/')[-1:].pop().split('.')[:1].pop() 1212 | fileExt = path.split('.')[1:] 1213 | fileExt = "".join(fileExt) 1214 | directories = path.split('/')[1:-1] 1215 | directory = "/".join(directories) 1216 | if len(fileExt) > 0: 1217 | self._logger.debug("Directories: "+str(directories)[1:-1]) 1218 | self._logger.debug("Directory: "+directory) 1219 | self._logger.debug("File Extension: "+fileExt) 1220 | self._logger.debug("URL Path: "+path) 1221 | self._logger.debug("Filename: "+filename) 1222 | else: 1223 | self._logger.debug("No file Extension, directory is: "+path) 1224 | 1225 | #Get domain and netloc 1226 | netloc = parsed.netloc.encode("utf-8") 1227 | domain = netloc.split(':')[0] 1228 | 1229 | self._logger.debug("Domain/: "+domain) 1230 | 1231 | ''' 1232 | print "Complete URL: "+completeURL 1233 | print "Domain: "+domain 1234 | print "Netloc: "+ netloc 1235 | print "Query value: "+p.getValue() 1236 | print "Query var: "+p.getName() 1237 | print "Directories: "+str(directories)[1:-1] 1238 | print "Directories2: "+str(directories) 1239 | print "Directory: "+directory 1240 | print "File Extension: "+fileExt 1241 | print "URL Path: "+path 1242 | print "Filename: "+filename 1243 | print "Base URL: "+baseURL 1244 | ''' 1245 | 1246 | responseData = "" 1247 | if not messageIsRequest: #when it's a response, get the response data 1248 | content = messageInfo.getResponse() 1249 | response = self._helpers.analyzeResponse(content) 1250 | responseData = self._helpers.bytesToString(content[response.getBodyOffset():]) 1251 | 1252 | #data = UrlData("",headers,"","","","","","",responseData,self._logger) 1253 | 1254 | data = UrlData(url,domain,netloc,directories,params,filename,fileExt,baseURL,completeURL,path,responseData,self._logger) 1255 | return data 1256 | 1257 | # This method is called when multiple issues are reported for the same URL 1258 | # In this case we are checking if the issue detail is different, as the 1259 | # issues from our scans include affected parameters/values in the detail, 1260 | # which we will want to report as unique issue instances 1261 | def consolidateDuplicateIssues(self, existingIssue, newIssue): 1262 | if (existingIssue.getIssueDetail() == newIssue.getIssueDetail()): 1263 | return -1 1264 | else: 1265 | return 0 1266 | 1267 | #Have to be implemented 1268 | def doPassiveScan(self, baseRequestResponse): 1269 | pass 1270 | 1271 | #Have to be implemented 1272 | def doActiveScan(self, baseRequestResponse, insertionPoint): 1273 | pass 1274 | ''' 1275 | Multithreaded class to execute queries out of the Queue.Queue 1276 | 1277 | Also get the response and validate the 404 type 1278 | ''' 1279 | class RequestorWorker(threading.Thread): 1280 | 1281 | def __init__(self, threadID, name, queue, error404, logger, requestor, UI, recursiveURLs): 1282 | 1283 | #Sahred Queue between Thread Workers 1284 | self._id = threadID 1285 | self._name = name 1286 | self._queue = queue #request queue received from the Requestor 1287 | self._threadLock = threading.Lock() 1288 | self._alive = True 1289 | threading.Thread.__init__(self) 1290 | self.daemon = True 1291 | #self._responseQueue = responseQueue 1292 | self._error404 = error404 1293 | self._logger = logger 1294 | self._requestor = requestor 1295 | self._ui = UI 1296 | self._recursiveURLs = recursiveURLs 1297 | 1298 | self._acceptedCode = (200,400,401,403,500) 1299 | 1300 | #TODO: Set a randomizer of user-agent and add the option in .ini file 1301 | self._headers = { 1302 | 'User-Agent': 'Mozilla/5.0' 1303 | } 1304 | 1305 | return 1306 | 1307 | ''' 1308 | Return type of 404 for requested domain 1309 | 1310 | @param domain: domain to fetch error 404 type 1311 | ''' 1312 | def _getError404(self,url): 1313 | #Get domain and netloc 1314 | parsed = urlparse(url) 1315 | netloc = parsed.netloc.encode("utf-8") 1316 | domain = netloc.split(':')[0] 1317 | return self._error404[domain] 1318 | 1319 | def run(self): 1320 | while(self._alive): 1321 | #waiting for queue 1322 | #print "Waiting for queue: "+self._name 1323 | url = self._queue.get() 1324 | 1325 | #print "TASK RECEIVED: " + url + " From: " + self._name 1326 | 1327 | self._logger.debug(self._name+" requesting(URL): " + url) 1328 | self._logger.info(self._name+" requesting(URL): " + url) 1329 | #print "[Requesting] " + url 1330 | 1331 | #TODO: randomizedUserAgent 1332 | #TODO: - 302 (redirect) --> parse the redirect URL (in scope ok, in sitemap stop, not in site map add to queue : 200+window.location or JS isn't catch yet 1333 | 1334 | response = requests.get(url, headers=self._headers, allow_redirects=False) 1335 | 1336 | if response.status_code in self._acceptedCode: 1337 | #add no false positive to site map 1338 | code = self._getError404(url) 1339 | print "[URL EXISTS](Response: " +str(response.status_code)+ ") | 404 type:" + str(code) +" | FOR URL: "+ str(url) 1340 | 1341 | #False positive logic. 1342 | #TODO: can be update or upgraded for sure! :) 1343 | fp = "" 1344 | 1345 | 1346 | ''' 1347 | si 404 1348 | si response 200 ok 1349 | si response 401 1350 | si response 403 1351 | si response 300 1352 | si response 500 1353 | si 403 1354 | si response 200 1355 | si response 401 fp 1356 | si response 403 fp 1357 | si response 300 1358 | si response 500 fp 1359 | si 500 1360 | si response 200 1361 | si response 401 1362 | si response 403 1363 | si response 300 1364 | si response 500 fp 1365 | si intext 1366 | si response 200 need reverification fp 1367 | si response 401 1368 | si response 403 1369 | si response 300 1370 | si response 500 1371 | si 300 1372 | si response 200 1373 | si response 401 1374 | si response 403 1375 | si response 300 fp 1376 | si response 500 1377 | ''' 1378 | 1379 | #if the current request is a 403 and the 404 page isn't a 403 page, should be false positive 1380 | if response.status_code == 403 and code != 403: 1381 | fp = " ,False Positive" 1382 | #if current response is a 200 and the 404 page was inside a 200 code page, it can be a false positive 1383 | elif response.status_code == 200 and code == "404 in page": 1384 | fp = " ,False Positive" 1385 | #if 404 page is inside a 200 response code, a 300 redirect page or a 403, many possible false positive 1386 | elif code == "404 in page" or code == 300 or code == 403: 1387 | fp = " ,Possible False Positive" 1388 | #code is 200 or whatnot 1389 | else: #TODO: define all directory in a list and add to the recursive list+validate latest directory of current url to see if it is in list, if not add it 1390 | print 200 1391 | #if it's a direct directory, let's recurse... if not recurse too much already! 1392 | #if urlparse(url).path[-1] == '/' and self._recursiveURLs.get(str(url), 0) <= self._ui.getRecursiveConfig(): 1393 | # self._recursiveURLs[str(url)] = self._recursiveURLs.get(str(url), 0) + 1 #adjust the recursed level for that directory 1394 | # self._requestor.runRequest(url,Queue.Queue(1)) 1395 | 1396 | #add code to the Jlist here 1397 | print url 1398 | self._ui.addURL(url + " , ("+str(response.status_code)+")" + fp) 1399 | 1400 | 1401 | 1402 | #TODO: add page to SiteMap if not there already? 1403 | 1404 | #TODO: issue = SmartBusterIssue() 1405 | #might need to parse the url into data for the issue? 1406 | #issue=ScanIssue(baseRequestResponse.getHttpService(), self._helpers.analyzeRequest(baseRequestResponse).getUrl(), httpmsgs, ISSUE_NAME, ISSUE_DETAIL, SEVERITY, CONFIDENCE, REMEDIATION_DETAIL, ISSUE_BACKGROUND, REMEDIATION_BACKGROUND) 1407 | #self._callbacks.addScanIssue(issue) 1408 | 1409 | '''---------------------------------------------------------------------------------------------------------------------------------------- 1410 | Class to hold the Request data 1411 | 1412 | - Using Requests API we use a Queue to append HTTP requests to be executed. 1413 | - If the requests return a 200/401/403/500 we add them to the sitemap and add them to our list of URL/Dir/file found 1414 | - Can save data found to csv 1415 | ----------------------------------------------------------------------------------------------------------------------------------------''' 1416 | class Requestor(): 1417 | ''' 1418 | Initialize 1419 | 1420 | ''' 1421 | def __init__(self,logger,UI): 1422 | 1423 | #Queue to hold URL to request 1424 | #Each item will be a URL string str(URL) 1425 | self._requestQueue = Queue.Queue(0) 1426 | self._logger = logger 1427 | 1428 | #hold type of 404 error by domain 1429 | self._error404 = {} 1430 | 1431 | #hold url that are being recursive 1432 | self._recursiveURLs = [] 1433 | 1434 | #Queue to hold URL and their response code 1435 | #Each item will be a list (url,code) 1436 | #self._responseQueue = deque() 1437 | 1438 | #TODO: Set a randomizer of user-agent and add the option in .ini file 1439 | self._headers = { 1440 | 'User-Agent': 'Mozilla/5.0' 1441 | } 1442 | 1443 | self._logger.debug("Requestor object created") 1444 | 1445 | threads = [] #list containing threads 1446 | 1447 | #1 thread needed for infofestival. Don't know how to split the pages between workers 1448 | for i in range(0,40):#TODO: Set a number of thread in UI 1449 | t = RequestorWorker(i,"RequestorWorker-"+str(i),self._requestQueue,self._error404, logger, self, UI, self._recursiveURLs) 1450 | threads.append(t) 1451 | t.start() 1452 | 1453 | return 1454 | 1455 | 1456 | ''' 1457 | Add a request to the queue to be execute by a thread worker (RequestorWorker) 1458 | 1459 | @param url: the URL to get a response from 1460 | ''' 1461 | def addRequest(self,url,data): 1462 | 1463 | 1464 | #print "ADDING: "+ url 1465 | 1466 | #get the 404 details for the current domain 1467 | self._define404(data) 1468 | self._requestQueue.put(url) ##see if we can put the type404 inside the queue along with the url 1469 | return 1470 | 1471 | ''' 1472 | Define 404 type of the current domain 1473 | ''' 1474 | def _define404(self,data): 1475 | 1476 | domain = data.getDomain() 1477 | #only do once per domain 1478 | if domain not in self._error404: 1479 | 1480 | code = 404 1481 | errorQueue = Queue.Queue(0) 1482 | 1483 | #get a 404 page 1484 | m = hashlib.md5() 1485 | m.update(str(random.random())) 1486 | 1487 | url = data.getBaseUrl()+"/"+m.hexdigest() 1488 | print url 1489 | self.runRequest(url,errorQueue) 1490 | response = errorQueue.get() 1491 | 1492 | #if website use standard 404 error, everything is good 1493 | if response.status_code == 404: 1494 | code = 404 1495 | 1496 | #if website used a 3xx code 1497 | if 310 - response.status_code < 11 and 310 - response.status_code > 0: 1498 | code = 300 1499 | 1500 | if response.status_code == 403: 1501 | code = 403 1502 | 1503 | #if website use a 5xx code 1504 | if 510 - response.status_code < 11 and 510 - response.status_code > 0: 1505 | code = 500 1506 | 1507 | #if website use a 200 1508 | if response.status_code == 200: 1509 | 1510 | soup = BeautifulSoup(response.content, "html.parser") 1511 | 1512 | ################################ 1513 | #TODO: more use case to add 1514 | ################################ 1515 | if soup.findAll(text=re.compile("page not found")): 1516 | code = "404 in page" 1517 | elif soup.findAll(text=re.compile("404")): 1518 | code = "404 in page" 1519 | elif soup.findAll(text=re.compile("page does not exist")): 1520 | code = "404 in page" 1521 | elif soup.findAll(text=re.compile("error 404")): 1522 | code = "404 in page" 1523 | 1524 | #define which code is refer to a 404 1525 | self._error404[domain] = code 1526 | 1527 | return 1528 | 1529 | ''' 1530 | Run a NON DELAYED (no thread workers) request and save the url:response code to the response deque class variable 1531 | 1532 | @param url: the URL to request and get a response 1533 | @param responseQueue: thread safe queue to send the response back to the spider or other objects 1534 | ''' 1535 | def runRequest(self,url,responseQueue): 1536 | 1537 | #TODO: After thread is done, in thread read the _requestQeue object 1538 | 1539 | self._logger.debug("runRequest(URL): "+url) 1540 | self._logger.info("EXECUTING REQUEST FOR: "+url) 1541 | response = requests.get(url, headers=self._headers, allow_redirects=False) 1542 | responseQueue.put(response) 1543 | 1544 | #TODO: Get code 1545 | #TODO: add page to SiteMap if not there already? 1546 | 1547 | 1548 | self._logger.debug("runRequest done for: "+url) 1549 | 1550 | return 1551 | 1552 | #TODO randomizedUserAgent 1553 | def randomizedUserAgent(self): 1554 | return 1555 | 1556 | 1557 | 1558 | 1559 | '''---------------------------------------------------------------------------------------------------------------------------------------- 1560 | Class to hold the Spidering data 1561 | 1562 | - Based on: http://www.netinstructions.com/how-to-make-a-web-crawler-in-under-50-lines-of-python-code/ 1563 | Uses BeautifulSoup, require to download/install it. 1564 | ----------------------------------------------------------------------------------------------------------------------------------------''' 1565 | class Spider(): 1566 | 1567 | ''' 1568 | Initialize 1569 | 1570 | @param startUrl: the URL to start the spidering 1571 | 1572 | ''' 1573 | def __init__(self, data, maxPages, requestObj, logger): 1574 | self._data = data 1575 | self._words = [] 1576 | self._mergedWords = [] 1577 | self._maxPages = int(maxPages) 1578 | self._requestor = requestObj 1579 | self._queue = Queue.Queue(self._maxPages) 1580 | self._domain = data.getDomain() 1581 | self._logger = logger 1582 | self._logger.debug("Spider object created") 1583 | 1584 | ''' 1585 | Run the spidering 1586 | 1587 | @return: list of all words found 1588 | @todo: use TextBlob for other language, right now mostly only english based words will be categorized correctly. 1589 | ''' 1590 | def runSpidering(self): 1591 | 1592 | urlString = str(self._data.getUrl()) 1593 | url = URL(urlString) 1594 | 1595 | print "Spider, URL: " + urlString 1596 | #Get the words from the URL, starting with the startUrl 1597 | link_list = [urlString] 1598 | 1599 | #Counter 1600 | pagesVisited = 0 1601 | 1602 | self._logger.debug("Max pages to visit: " + str(self._maxPages)) 1603 | 1604 | while int(pagesVisited) < int(self._maxPages): 1605 | self._logger.debug("Nbr Page Visited: " + str(pagesVisited) + " / " + str(self._maxPages)) 1606 | self._logger.debug("Visiting: " + link_list[pagesVisited]) 1607 | visitingUrl = link_list[pagesVisited] 1608 | pagesVisited = pagesVisited+1 1609 | print "Visiting URL: "+visitingUrl 1610 | try: 1611 | #??? Fix the url retrieve. 1612 | #If it starts with / we add the domain to it 1613 | if self._domain not in visitingUrl: 1614 | if visitingUrl.startswith("/"): 1615 | visitingUrl = visitingUrl[1:] 1616 | #TODO: startswith /# 1617 | 1618 | link_list[pagesVisited] = self._data.getCompleteURL() + visitingUrl 1619 | visitingUrl = link_list[pagesVisited] 1620 | 1621 | #send an asynchronus HTTP request and wait for the response 1622 | thread = threading.Thread( 1623 | target=self._requestor.runRequest, 1624 | name="Thread-Spider", 1625 | args=[visitingUrl, self._queue], 1626 | ) 1627 | thread.start() 1628 | thread.join() 1629 | response = self._queue.get() 1630 | self._logger.debug("Response received from: "+visitingUrl) 1631 | 1632 | #Get the soup 1633 | soup = BeautifulSoup(response.content, "html.parser") 1634 | 1635 | #Get the visible text 1636 | [s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])] 1637 | visible_texts = soup.getText()#.encode('utf-8').strip() 1638 | #Get the text blob 1639 | blob = TextBlob(visible_texts) 1640 | 1641 | #Get the words : TODO: add the 1000 value in the bsb.ini? 1642 | if len(blob.words) <= 1000: #merging 2 words and up to 1000 (cpu intensivity) 1643 | for words,tag in blob.tags: 1644 | #Get only noun and numbers 1645 | if tag.startswith("NN") or tag == "CD": 1646 | self._words.append(words) 1647 | 1648 | self._logger.debug("Size of WORDS: " + str(len(self._words))) 1649 | 1650 | #Get the links for next pages or stop 1651 | aSoup = soup.findAll("a") 1652 | if len(aSoup) > 0: 1653 | for i in aSoup: 1654 | #Do not use previous page, index or anchors 1655 | if not i['href'].startswith("#") and not i['href'] == "/" and not i['href'] in i and not i['href'].startswith("/#") and not i['href'].startswith("//"): 1656 | link_list.append(i['href']) 1657 | else: 1658 | self._logger.debug("No words on: "+visitingUrl) 1659 | break 1660 | 1661 | except KeyError: 1662 | self._logger.error("SpiderError: KeyError") 1663 | pass 1664 | except requests.exceptions.RequestException as e: 1665 | self._logger.error("SpiderError: "+e.reason) 1666 | pass 1667 | 1668 | return self._words 1669 | 1670 | ''' 1671 | Merge the obtained words from the spidering 1672 | 1673 | @return: List of all words mixed with each others 1674 | ''' 1675 | def mergeWords(self): 1676 | if len(self._words) > 1: 1677 | 1678 | #original list of words that we want to mix 1679 | listOriginal = self._words 1680 | 1681 | #merging all words together 1682 | for words in listOriginal: 1683 | for wordsToMerge in listOriginal: 1684 | self._mergedWords.append(words+wordsToMerge) 1685 | 1686 | return True 1687 | else: 1688 | return False 1689 | 1690 | 1691 | ''' 1692 | @return: List of all words mixed with each others 1693 | Note: The return words needs to be convert to utf-8 1694 | ''' 1695 | def getMergedWords(self): 1696 | return self._mergedWords 1697 | 1698 | ''' 1699 | @return: List of all words 1700 | Note: The return words needs to be convert to utf-8 1701 | ''' 1702 | def getWords(self): 1703 | return self._words 1704 | 1705 | 1706 | '''---------------------------------------------------------------------------------------------------------------------------------------- 1707 | Class to share community data to annonimized server 1708 | ----------------------------------------------------------------------------------------------------------------------------------------''' 1709 | class technologyScanner(): 1710 | 1711 | def __init__(self, optIn, logger): 1712 | self._optIn = optIn 1713 | self._logger = logger 1714 | 1715 | self._logger.debug("CommunityData Object Created") 1716 | 1717 | return 1718 | 1719 | '''---------------------------------------------------------------------------------------------------------------------------------------- 1720 | Class to share community data to annonimized server 1721 | ----------------------------------------------------------------------------------------------------------------------------------------''' 1722 | class communityData(): 1723 | 1724 | def __init__(self, optIn, logger): 1725 | self._optIn = optIn 1726 | self._logger = logger 1727 | 1728 | self._logger.debug("CommunityData Object Created") 1729 | 1730 | return 1731 | 1732 | def submitData(self,fileName,isFile): 1733 | if self._optIn: 1734 | 1735 | #prepare the request to submit to the server 1736 | if isFile: 1737 | print "Data is a file" 1738 | #data to sent is a file 1739 | else: 1740 | print "data is a directory" 1741 | #data to sent is a directory 1742 | 1743 | #contact the server 1744 | print "contacting the server with data: " + fileName 1745 | return 1746 | 1747 | '''---------------------------------------------------------------------------------------------------------------------------------------- 1748 | Class to hold the URL data in separated parts 1749 | ----------------------------------------------------------------------------------------------------------------------------------------''' 1750 | class UrlData(): 1751 | 1752 | def __init__(self,url,domain,netloc,directories,params,filename, fileExt,baseURL,completeURL,path,responseData,logger): 1753 | self._url = url 1754 | self._domain = domain 1755 | self._netloc = netloc 1756 | self._directories = directories 1757 | self._params = params 1758 | self._fileExt = fileExt 1759 | self._baseURL = baseURL 1760 | self._completeURL = completeURL 1761 | self._responseData = responseData 1762 | self._logger = logger 1763 | self._path = path 1764 | self._filename = filename 1765 | 1766 | self._logger.debug("UrlData object created") 1767 | return 1768 | 1769 | def getPath(self): 1770 | return self._path 1771 | 1772 | def getFilename(self): 1773 | return self._filename 1774 | 1775 | def getResponseHeaders(self): 1776 | if not self._url: 1777 | return self._domain 1778 | 1779 | def getResponseData(self): 1780 | return self._responseData 1781 | 1782 | def getBaseUrl(self): 1783 | return self._baseURL 1784 | 1785 | def getCompleteURL(self): 1786 | return self._completeURL 1787 | 1788 | def getUrl(self): 1789 | return self._url 1790 | 1791 | def getDomain(self): 1792 | return self._domain 1793 | 1794 | def getNetloc(self): 1795 | return self._netloc 1796 | 1797 | def getDirectories(self): 1798 | return self._directories 1799 | 1800 | def getLastDirectory(self): 1801 | if len(self._directories) > 0: 1802 | return self._directories[len(self._directories)-1] 1803 | else: 1804 | return "" 1805 | 1806 | def getParams(self): 1807 | return self._params 1808 | 1809 | def getFileExt(self): 1810 | return self._fileExt 1811 | '--------------------------------------------------------------------' 1812 | 1813 | 1814 | 1815 | '''-------------------------------------------------------------------- 1816 | Class to hold the Issues found 1817 | @TODO: see for Sitemap instead of issue or WITh issues 1818 | --------------------------------------------------------------------''' 1819 | class SmartBusterIssue(IScanIssue): 1820 | '''This is our custom IScanIssue class implementation.''' 1821 | def __init__(self, httpService, url, httpMessages, issueName, issueDetail, severity, confidence, remediationDetail, issueBackground, remediationBackground): 1822 | self._issueName = issueName 1823 | self._httpService = httpService 1824 | self._url = url 1825 | self._httpMessages = httpMessages 1826 | self._issueDetail = issueDetail 1827 | self._severity = severity 1828 | self._confidence = confidence 1829 | self._remediationDetail = remediationDetail 1830 | self._issueBackground = issueBackground 1831 | self._remediationBackground = remediationBackground 1832 | 1833 | 1834 | def getConfidence(self): 1835 | return self._confidence 1836 | 1837 | def getHttpMessages(self): 1838 | return self._httpMessages 1839 | #return None 1840 | 1841 | def getHttpService(self): 1842 | return self._httpService 1843 | 1844 | def getIssueBackground(self): 1845 | return self._issueBackground 1846 | 1847 | def getIssueDetail(self): 1848 | return self._issueDetail 1849 | 1850 | def getIssueName(self): 1851 | return self._issueName 1852 | 1853 | def getIssueType(self): 1854 | return 0 1855 | 1856 | def getRemediationBackground(self): 1857 | return self._remediationBackground 1858 | 1859 | def getRemediationDetail(self): 1860 | return self._remediationDetail 1861 | 1862 | def getSeverity(self): 1863 | return self._severity 1864 | 1865 | def getUrl(self): 1866 | return self._url 1867 | 1868 | def getHost(self): 1869 | return 'localhost' 1870 | 1871 | def getPort(self): 1872 | return int(80) 1873 | -------------------------------------------------------------------------------- /DemoLabs - BurpSmartBuster - DEF CON 2016.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathetiq/BurpSmartBuster/2a5998184bae006a16620930f0b9ef3e2cfc9b2e/DemoLabs - BurpSmartBuster - DEF CON 2016.pdf -------------------------------------------------------------------------------- /DerbyCon 2016 - BurpSmartBuster - Stable Talk.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pathetiq/BurpSmartBuster/2a5998184bae006a16620930f0b9ef3e2cfc9b2e/DerbyCon 2016 - BurpSmartBuster - Stable Talk.pdf -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Patrick Mathieu (@PathetiQ) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsupported anymore 2 | This application is unsupported anymore, but a new "version" is coming in the next few months! :) 3 | 4 | ![BSB](http://i.imgur.com/rHToHhe.png) 5 | 6 | # BurpSmartBuster 7 | 8 | A Burp Suite content discovery plugin that add the smart into the Buster! 9 | 10 | ## Installation 11 | 12 | - Now inside Burp Suite Store 13 | - Or [See Wiki page](https://github.com/pathetiq/BurpSmartBuster/wiki) for manual installation 14 | 15 | 16 | # Features 17 | 18 | * Looks for files, directories and file extensions based on current requests received by Burp Suite 19 | * Checks for: 20 | * Directories in the current URL directories 21 | * Files in the current URL directories 22 | * Replace and add extension to current files 23 | * Add suffix and prefix to current files 24 | * Easy and documented code 25 | * Verbose and logging 26 | 27 | # Todos 28 | 29 | * In progress: Technological and environment checks (PHP, IIS, Apache, SharePoint, etc.) 30 | * In progress: Community data 31 | * Limit Thread speed 32 | * Use the spidering results for actual brute forcing 33 | 34 | ## Presentations and release date 35 | 36 | * BSB was released on August 6th 2016 at [DEF CON 24 Demolabs](https://defcon.org/html/defcon-24/dc-24-demolabs.html) in the Grand Salon. 37 | * An updated talk about the tool and it's future has been done at [Derbycon 6.0](https://www.youtube.com/watch?v=RFxUfoVgMrw). 38 | * French version of the talk given at [Hackfest.ca 2016](https://www.youtube.com/watch?v=yIC9zmKLoxg) 39 | 40 | ## Code workflow and options 41 | 42 | See the Presentation PDF. 43 | 44 | More information to come in the wiki. 45 | -------------------------------------------------------------------------------- /bsb.ini: -------------------------------------------------------------------------------- 1 | ###################################### 2 | # BurpSmartBuster Config File 3 | ###################################### 4 | 5 | 6 | #---------------------------------------------------------------------------------- 7 | # Define the number of request executed when a URL is browse from the user in Burp 8 | # 9 | # Recommended number of test is: 10 | # The greater the number, the less stealth and fast it is 11 | #---------------------------------------------------------------------------------- 12 | [NumberOfTests] 13 | Paths: 5 14 | Files: 5 15 | Extensions: 5 16 | Directories: 5 17 | 18 | #---------------------------------------------------------------------------------- 19 | # RecursiveDirs: The number of directories to test when accessing a deep down directory from a browsed URL 20 | # Ex: http://site.com/a/b/c/file.php will be "3" and test will occurs on /a/ /a/b/ and /a/b/c/ 21 | # 22 | # NumberOfPages: The number of page the Spider need to gather words from it for dynamic/smart analysis with the "smart" buster options. 23 | # Do not mix this setting for the recursive number of deepness. 24 | #---------------------------------------------------------------------------------- 25 | [Spider] 26 | RecursiveDirs: 3 27 | NumberOfPages: 5 28 | 29 | #---------------------------------------------------------------------------------- 30 | # File to use for testing directories 31 | # 32 | # local : Means it will use BsB files with context logic (on, off) 33 | # Smart : Will only use the current (browsed files) filename and directory names to test for basic predefine extension/file/dir (on, off) and data from the website 34 | # File : Push your own file (Ex: /usr/share/wordlist/list.lst) , no logic here 35 | # Spider: Use the spider to gather all info, like smart but it browse for you 36 | #---------------------------------------------------------------------------------- 37 | [Smart] 38 | Local: off 39 | Smart: on 40 | File: off 41 | Spider: off 42 | verbose = off 43 | 44 | #---------------------------------------------------------------------------------- 45 | # InScope items 46 | # IMPORTANT Not using in scope only items will trigger the spidering and multiples request on website that are not in your scope! 47 | #---------------------------------------------------------------------------------- 48 | [InScope] 49 | ScopeOnly: on 50 | 51 | #---------------------------------------------------------------------------------- 52 | # File extension to ignore inside the plugin 53 | # TODO: implement this 54 | #---------------------------------------------------------------------------------- 55 | [Ignore] 56 | FileType: gif,jpg,png,css,js,ico,woff 57 | 58 | [Technical] 59 | TrailingSlash: on 60 | -------------------------------------------------------------------------------- /data.json: -------------------------------------------------------------------------------- 1 | { 2 | "extensions":[ 3 | {"name":".doc", "description":"downloadable", "type":"default"}, 4 | {"name":".docx", "description":"downloadable", "type":"default"}, 5 | {"name":".xls", "description":"downloadable", "type":"default"}, 6 | {"name":".xlsx", "description":"downloadable", "type":"default"}, 7 | 8 | {"name":".zip", "description":"compress", "type":"default"}, 9 | {"name":".rar", "description":"compress", "type":"default"}, 10 | {"name":".tar.gz", "description":"compress", "type":"default"}, 11 | {"name":".tar", "description":"compress", "type":"default"}, 12 | {"name":".bak.zip", "description":"compress", "type":"default"}, 13 | {"name":".bak.gz", "description":"compress", "type":"default"}, 14 | {"name":".bak.z", "description":"compress", "type":"default"}, 15 | {"name":".tar.gz", "description":"compress", "type":"default"}, 16 | {"name":".tgz", "description":"compress", "type":"default"}, 17 | {"name":".7z", "description":"compress", "type":"default"}, 18 | 19 | {"name":".bak", "description":"backup", "type":"default"}, 20 | {"name":".backup", "description":"backup", "type":"default"}, 21 | {"name":".bk", "description":"backup", "type":"default"}, 22 | {"name":".bac", "description":"backup", "type":"default"}, 23 | {"name":".asd", "description":"backup", "type":"default"}, 24 | {"name":".dsa", "description":"backup", "type":"default"}, 25 | 26 | {"name":"~", "description":"autosave", "type":"default"}, 27 | {"name":".swp", "description":"autosave", "type":"default"}, 28 | {"name":".swo", "description":"autosave", "type":"default"}, 29 | {"name":".save", "description":"autosave", "type":"default"}, 30 | {"name":".asd", "description":"autosave", "type":"default"}, 31 | {"name":".autosave", "description":"autosave", "type":"default"}, 32 | {"name":"._", "description":"autosave", "type":"default"}, 33 | 34 | {"name":".sql", "description":"database", "type":"default"}, 35 | {"name":".sql.zip", "description":"database", "type":"default"}, 36 | {"name":".sql.gz", "description":"database", "type":"default"}, 37 | {"name":".sql.tar.gz", "description":"database", "type":"default"}, 38 | {"name":".sql.tar", "description":"database", "type":"default"}, 39 | {"name":".sql.rar", "description":"database", "type":"default"}, 40 | {"name":".db", "description":"database", "type":"default"}, 41 | {"name":".bd", "description":"database", "type":"default"}, 42 | {"name":".diff", "description":"database", "type":"default"}, 43 | {"name":".trn", "description":"database", "type":"default"}, 44 | {"name":".bd", "description":"database", "type":"default"}, 45 | 46 | {"name":"._Old", "description":"old", "type":"default"}, 47 | {"name":"._old", "description":"old", "type":"default"}, 48 | {"name":".old", "description":"old", "type":"default"}, 49 | 50 | {"name":".dmp", "description":"random", "type":"default"}, 51 | {"name":".sh", "description":"random", "type":"default"}, 52 | 53 | {"name":".conf", "description":"configuration", "type":"default"}, 54 | {"name":".ini", "description":"configuration", "type":"default"}, 55 | {"name":".pem", "description":"certificate", "type":"default"}, 56 | 57 | {"name":".dev", "description":"development", "type":"default"}, 58 | {"name":".phps", "description":"development", "type":"default"} 59 | ], 60 | 61 | "fileprefix":[ 62 | {"name":"~", "description":"backup", "type":"default"}, 63 | {"name":".", "description":"backup", "type":"default"}, 64 | 65 | {"name":"Old_", "description":"old", "type":"default"}, 66 | {"name":"old_", "description":"old", "type":"default"}, 67 | 68 | {"name":"Copy%20of%20", "description":"copy", "type":"default"} 69 | ], 70 | 71 | "filesuffix":[ 72 | {"name":"~", "description":"backup", "type":"default"}, 73 | 74 | {"name":"_Old", "description":"old", "type":"default"}, 75 | {"name":"_old", "description":"old", "type":"default"}, 76 | {"name":"%20-%20Copy", "description":"copy", "type":"default"}, 77 | {"name":"(1)", "description":"copy", "type":"default"}, 78 | {"name":"(2)", "description":"copy", "type":"default"}, 79 | {"name":"(3)", "description":"copy", "type":"default"}, 80 | {"name":"(4)", "description":"copy", "type":"default"}, 81 | {"name":"(copy%201)", "description":"copy", "type":"default"}, 82 | {"name":"(copy%202)", "description":"copy", "type":"default"}, 83 | {"name":"(copy%203)", "description":"copy", "type":"default"}, 84 | {"name":"(copy%204)", "description":"copy", "type":"default"}, 85 | {"name":"(copy%201)(copy%201)", "description":"copy", "type":"default"}, 86 | {"name":"%20-%20Copy", "description":"copy", "type":"default"}, 87 | {"name":"%20copy", "description":"copy", "type":"default"}, 88 | {"name":"%20(1)", "description":"copy", "type":"default"} 89 | ], 90 | 91 | "files":[ 92 | {"name":"web.config", "description":"config", "type":"default"}, 93 | {"name":"wp-config.php", "description":"config", "type":"default"}, 94 | {"name":"configuration.php", "description":"config", "type":"default"}, 95 | {"name":"LocalSettings.php", "description":"config", "type":"default"}, 96 | {"name":"mt-config.cgi", "description":"config", "type":"default"}, 97 | {"name":"settings.php", "description":"config", "type":"default"}, 98 | {"name":"setting.php", "description":"config", "type":"default"}, 99 | {"name":"setting.ini", "description":"config", "type":"default"}, 100 | {"name":"settings.ini", "description":"config", "type":"default"}, 101 | {"name":"config.ini", "description":"config", "type":"default"}, 102 | {"name":"configuration.ini", "description":"config", "type":"default"}, 103 | {"name":"configurations.ini", "description":"config", "type":"default"}, 104 | {"name":".config", "description":"config", "type":"default"}, 105 | {"name":"mediawiki/LocalSettings.php", "description":"config", "type":"default"}, 106 | {"name":"mediawiki/LocalSettings.php", "description":"config", "type":"default"}, 107 | {"name":"mediawiki/LocalSettings.php", "description":"config", "type":"default"}, 108 | {"name":"awstats.conf", "description":"config", "type":"default"}, 109 | 110 | {"name":".bash_history", "description":"user", "type":"default"}, 111 | {"name":".cache", "description":"user", "type":"default"}, 112 | {"name":".bashrc", "description":"user", "type":"default"}, 113 | {"name":".bash_profile", "description":"user", "type":"default"}, 114 | {"name":".mysql_history", "description":"user", "type":"default"}, 115 | {"name":".sh_history", "description":"user", "type":"default"}, 116 | {"name":".profile", "description":"user", "type":"default"}, 117 | {"name":".rhosts", "description":"user", "type":"default"}, 118 | {"name":".perf", "description":"user", "type":"default"}, 119 | {"name":".profile", "description":"user", "type":"default"}, 120 | {"name":".viminfo", "description":"user", "type":"default"}, 121 | {"name":".vimrc", "description":"user", "type":"default"}, 122 | {"name":".nano_history", "description":"user", "type":"default"}, 123 | 124 | {"name":".bzr/README", "description":"repository", "type":"bzr"}, 125 | {"name":".bzr/branch-format", "description":"repository", "type":"bzr"}, 126 | {"name":".bzr/branch/branch.conf", "description":"repository", "type":"bzr"}, 127 | {"name":".bzr/branch/format", "description":"repository", "type":"bzr"}, 128 | {"name":".bzr/branch/last-revision", "description":"repository", "type":"bzr"}, 129 | {"name":".bzr/branch/tags", "description":"repository", "type":"bzr"}, 130 | {"name":".bzr/checkout/conflicts", "description":"repository", "type":"bzr"}, 131 | {"name":".bzr/checkout/dirstate", "description":"repository", "type":"bzr"}, 132 | {"name":".bzr/checkout/format", "description":"repository", "type":"bzr"}, 133 | {"name":".bzr/checkout/merge-hashes", "description":"repository", "type":"bzr"}, 134 | {"name":".bzr/checkout/views", "description":"repository", "type":"bzr"}, 135 | {"name":".bzr/repository/format", "description":"repository", "type":"bzr"}, 136 | {"name":".bzr/repository/pack-names", "description":"repository", "type":"bzr"}, 137 | {"name":".git/HEAD", "description":"repository", "type":"git"}, 138 | {"name":".git/index", "description":"repository", "type":"git"}, 139 | {"name":".git/config", "description":"repository", "type":"git"}, 140 | {"name":".git/description", "description":"repository", "type":"git"}, 141 | {"name":".git/COMMIT_EDITMSG", "description":"repository", "type":"git"}, 142 | {"name":".svn/entries", "description":"repository", "type":"svn"}, 143 | {"name":".hg/store/00manifest.i", "description":"repository", "type":"hg"}, 144 | {"name":".hg/store/00manifest.d", "description":"repository", "type":"hg"}, 145 | {"name":".hg/store/00changelog.i", "description":"repository", "type":"hg"}, 146 | {"name":".hg/store/00changelog.d", "description":"repository", "type":"hg"}, 147 | {"name":".hg/dirstate", "description":"repository", "type":"hg"}, 148 | {"name":".hg/requires", "description":"repository", "type":"hg"}, 149 | {"name":"CVS/Repository", "description":"repository", "type":"CVS"}, 150 | {"name":"CVS/Entries", "description":"repository", "type":"CVS"}, 151 | {"name":"CVS/Root", "description":"repository", "type":"CVS"}, 152 | {"name":"CVS/fileattr.xml", "description":"repository", "type":"CVS"}, 153 | 154 | {"name":".listings", "description":"filelist", "type":"default"}, 155 | {"name":".listing", "description":"filelist", "type":"default"}, 156 | {"name":"thumbs.db", "description":"filelist", "type":"default"}, 157 | 158 | {"name":"access.log", "description":"log", "type":"log"}, 159 | {"name":"error.log", "description":"log", "type":"log"}, 160 | {"name":"development.log", "description":"log", "type":"log"}, 161 | {"name":"dev.log", "description":"log", "type":"log"}, 162 | {"name":"production.log", "description":"log", "type":"log"}, 163 | {"name":"prod.log", "description":"log", "type":"log"}, 164 | 165 | {"name":".htpasswd", "description":"access", "type":"default"}, 166 | {"name":".htaccess", "description":"access", "type":"default"}, 167 | {"name":"id_rsa", "description":"access", "type":"default"}, 168 | {"name":"id_dsa", "description":"access", "type":"default"}, 169 | 170 | {"name":"dump.zip", "description":"dump", "type":"default"}, 171 | {"name":"db.zip", "description":"dump", "type":"default"}, 172 | {"name":"database.zip", "description":"dump", "type":"default"}, 173 | {"name":"database.tar", "description":"dump", "type":"default"}, 174 | {"name":"database.tar.gz", "description":"dump", "type":"default"}, 175 | {"name":"database.rar", "description":"dump", "type":"default"}, 176 | {"name":"dump.zip", "description":"dump", "type":"default"}, 177 | {"name":"db_full.zip", "description":"dump", "type":"default"}, 178 | {"name":"db_full.tar", "description":"dump", "type":"default"}, 179 | {"name":"db_full.tar.gz", "description":"dump", "type":"default"}, 180 | {"name":"db_full.rar", "description":"dump", "type":"default"}, 181 | 182 | {"name":"test.php", "description":"random", "type":"default"}, 183 | {"name":"test.txt", "description":"random", "type":"default"}, 184 | {"name":"test.asp", "description":"random", "type":"default"}, 185 | {"name":"test.aspx", "description":"random", "type":"default"}, 186 | {"name":"test.asmx", "description":"random", "type":"default"}, 187 | 188 | {"name":"backup", "description":"backup", "type":"backup"}, 189 | {"name":"bak", "description":"backup", "type":"backup"}, 190 | {"name":"pass.db", "description":"passwords", "type":"passwords"}, 191 | {"name":"password", "description":"passwords", "type":"passwords"}, 192 | {"name":"passwd", "description":"passwords", "type":"passwords"}, 193 | {"name":".passwd", "description":"passwords", "type":"passwords"}, 194 | {"name":"pwd", "description":"passwords", "type":"passwords"}, 195 | {"name":".pwd", "description":"passwords", "type":"passwords"}, 196 | {"name":"password", "description":"passwords", "type":"passwords"}, 197 | {"name":"passwords", "description":"passwords", "type":"passwords"}, 198 | 199 | {"name":"xmlrpc.php", "description":"development", "type":"default"}, 200 | {"name":"info.php", "description":"development", "type":"default"}, 201 | {"name":"phpinfo.php", "description":"development", "type":"default"}, 202 | {"name":"infophp.php", "description":"development", "type":"default"}, 203 | {"name":"infos.php", "description":"development", "type":"default"}, 204 | {"name":"php.php", "description":"development", "type":"default"} 205 | ], 206 | 207 | "directories":[ 208 | {"name":".hg", "description":"repository", "type":"hg"}, 209 | {"name":".git", "description":"repository", "type":"git"}, 210 | {"name":".svn", "description":"repository", "type":"svn"}, 211 | {"name":".bzr", "description":"repository", "type":"bzr"}, 212 | {"name":"CVS", "description":"repository", "type":"CVS"}, 213 | {"name":"CVSROOT", "description":"repository", "type":"CVS"}, 214 | {"name":"cvsroot", "description":"repository", "type":"CVS"}, 215 | 216 | {"name":"conf", "description":"config", "type":"default"}, 217 | {"name":"config", "description":"config", "type":"default"}, 218 | {"name":"configuration", "description":"config", "type":"default"}, 219 | {"name":"configurations", "description":"config", "type":"default"}, 220 | {"name":"setting", "description":"config", "type":"default"}, 221 | {"name":"settings", "description":"config", "type":"default"}, 222 | 223 | {"name":"manage", "description":"admin", "type":"default"}, 224 | {"name":"management", "description":"admin", "type":"default"}, 225 | {"name":"admin", "description":"admin", "type":"default"}, 226 | {"name":"administration", "description":"admin", "type":"default"}, 227 | 228 | {"name":"backup", "description":"backup", "type":"backup"}, 229 | {"name":"bac", "description":"backup", "type":"backup"}, 230 | {"name":"back", "description":"backup", "type":"backup"}, 231 | {"name":"bak", "description":"backup", "type":"backup"}, 232 | {"name":"export", "description":"backup", "type":"backup"}, 233 | { 234 | "name":"save", "description":"backup", "type":"backup"}, 235 | {"name":"saved", "description":"backup", "type":"backup"}, 236 | 237 | {"name":"log", "description":"admin", "type":"log"}, 238 | {"name":"logs", "description":"admin", "type":"log"}, 239 | {"name":"logging", "description":"admin", "type":"log"}, 240 | 241 | {"name":"pass", "description":"passwords", "type":"passwords"}, 242 | {"name":"password", "description":"passwords", "type":"passwords"}, 243 | {"name":"passwd", "description":"passwords", "type":"passwords"}, 244 | {"name":"pwd", "description":"passwords", "type":"passwords"}, 245 | {"name":"passwords", "description":"passwords", "type":"passwords"}, 246 | 247 | 248 | {"name":"confidential", "description":"privacy", "type":"default"}, 249 | {"name":"confidentiel", "description":"privacy", "type":"default"}, 250 | 251 | {"name":".ssh", "description":"access", "type":"default"}, 252 | 253 | {"name":"httpd", "description":"webserver", "type":"default"} 254 | ] 255 | 256 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bs4==0.0.1 2 | nltk==3.1 3 | requests==2.10.0 4 | textblob==0.11.0 5 | tld==0.7.6 6 | --------------------------------------------------------------------------------